[Bio] / FigKernelPackages / DocUtils.pm Repository:
ViewVC logotype

View of /FigKernelPackages/DocUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.9 - (download) (as text) (annotate)
Sun Jul 23 17:47:27 2006 UTC (13 years, 8 months ago) by parrello
Branch: MAIN
Changes since 1.8: +14 -4 lines
Added capability to the ModifyConfigFile that inserts assignments near the top of the file. The variables used in these assignments can be referred to in subsequent assignments.

# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
# This file is part of the SEED Toolkit.
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.

package DocUtils;

=head1 Sprout Documentation Utilities

=head2 Introduction

This module processes documentation files. It can invoke C<pod2html> to convert a PM
file to a POD document or stylize an existing HTML file.

Stylization is automatically performed on all POD documents. It involves two separate
steps. First, it sorts the table of contents in alphabetical order; second, it inserts 
a style link to a specified cascading style sheet. This makes the POD document easier
to read and easier to navigate.


use strict;
use Tracer;
use File::Basename;
use File::stat;

=head2 Public Methods

=head3 PerlDoc

C<< my $newName = DocUtils::PerlDoc($inFile, $docDirectory, $style); >>

This method generates an HTML document for the specified PERL source file in the
specified output directory. If a style file name is specified, it will be added
as a style link. The style file name should be specified relative to the output
directory. So, for example

C<< DocUtils::PerlDoc('lib/Sprout.pm', 'CGI/Html/pod_doc', '../Georgia.css'); >>

would produce the POD documentation for C<Sprout.pm> in the C<lib> directory.
The output file would be C<CGI/Html/pod_doc/Sprout-pm.html>, and it would reference
the style file C<CGI/Html/Georgia.css>.

=over 4

=item inFile

Name of the PERL source file.

=item docDirectory

Output directory into which the HTML document should be placed.

=item style (optional)

If specified, the name of the style file to be used, relative to the output directory.

=item RETURN

Returns the file name given to the documentation file.



sub PerlDoc {
    # Get the parameters.
    my ($inFile, $docDirectory, $style) = @_;
    # Declare the return variable.
    my $retVal;
    # Get the file name components.
    my ($fileTitle, $inDirectory, $fileType) = fileparse($inFile, qr{\.[^.]+$});
    # Check the file for POD stuff.
    (open PODCHECK, $inFile) || Confess("Could not open POD input file $inFile.");
    my $podFound = 0;
    while (! $podFound && (my $line = <PODCHECK>)) {
        if ($line =~ /^=(head|pod)/i) {
            # We consider a file to have POD in it if we find a POD line or
            # a header line.
            $podFound = 1;
    close PODCHECK;
    # Check to see if we found POD stuff.
    if (! $podFound) {
        Trace("Module $fileTitle in $inDirectory does not have POD documentation.") if T(2);
    } else {
        Trace("Generating POD documentation for module $fileTitle.") if T(2);
        # Here we need to create POD documentation for this input file.
        # Switch to the input directory.
        chdir $inDirectory;
        my $result = system("pod2html --infile=$fileTitle$fileType --outfile=$fileTitle.tmp --title=$fileTitle") >> 8;
        Trace("POD for $inFile returned $result.") if T(1);
        # Compute the output file name. We convert the input file base name by translating
        # the period to a hyphen and then suffix "html".
        $fileType =~ tr'.'-';
        $retVal = "$fileTitle$fileType.html";
        my $outFileName = "$docDirectory/$retVal";
        # Customize the result.
        Stylize("$fileTitle.tmp", $style, $outFileName);
        unlink "$fileTitle.tmp";
    # Return the file title.
    return $retVal;

=head3 Stylize

C<< DocUtils::Stylize($inFile, $style, $outFile); >>

Dress up an HTML file. Dressing up the file includes putting a header line in for
the table of contents, adding a style link, and sorted the table of contents into
alphabetical order for ease of maneuvering. The Table of Contents is identified by
two HTML comment lines:

C<< <!-- INDEX BEGIN --> >>

C<< <!-- INDEX END --> >>

These are automatically inserted into the output of the C<pod2html> program. The directory
is presumed to be in outline form, using C<ul> and C<li> tags. Only the lowest level of
the outline is sorted.

=over 4

=item inFile

Input file name.

=item style

Style file name, relative to the output file's directory. If null or undefined, no style link is added.

=item outFile

Output file name.



sub Stylize {
    # Get the parameters.
    my ($inFile, $style, $outFile) = @_;
    Trace("Stylizing $inFile into $outFile.") if T(3);
    # Open the files.
    (open INFILE, '<', $inFile) || Confess("Could not open input file $inFile.");
    (open OUTFILE, '>', $outFile) || Confess("Could not open output file $outFile.");
    # Loop through the input file.
    while (<INFILE>) {
        # Check for one of the marker lines.
        if (m!^\s*<body!i) {
            # Here we have a body tag. We replace it with a null body tag and a header line
            # for the table of contents.
            print OUTFILE "<body>\n<h2>Table of Contents</h2>\n";
        } elsif (m!^\s*</head!i) {
            # Here we have the end of the heading section. We may need to insert a style link.
            if ($style) {
                print OUTFILE "<link href=\"$style\" rel=\"stylesheet\" type=\"text/css\">\n";
            print OUTFILE "</head>\n";
        } elsif (m/^<!-- INDEX BEGIN/) {
            # Here we have the start of a POD index. We need to sort the sections.
            # Read the whole index into a buffer.
            Trace("Buffering table of contents.") if T(4);
            my @buffer = ();
            my $endFound = 0;
            while (!$endFound && (my $record = <INFILE>)) {
                if (m/^<!-- INDEX END/) {
                    $endFound = 1;
                } else {
                    push @buffer, $record;
            # Sort the buffer.
            Trace("Sorting table of contents.") if T(4);
            my @sortedBuffer = SortIndexes(@buffer);
            # Write it back out.
            for my $record (@sortedBuffer) {
                print OUTFILE $record;
            Trace("Sort complete.") if T(4);
        } else {
            # Here we have a normal line, so we write it unmodified.
            print OUTFILE $_;
    # Close the files.
    close INFILE;
    close OUTFILE;
    Trace("Stylized file is at $outFile.") if T(4);

=head3 ModifyConfigFile

C<< DocUtils::ModifyConfigFile($targetFile, \%changes, \@inserts); >>

Modify the contents of a PERL configuration file. A PERL configuration file contains a
C<package> statement followed by a set of assignments having the form

    $var_name = "string";

with optional comments. The caller passes in a hash keyed by variable name, and the
configuration file will be updated to insure the variables mentioned in the hash have
the associated value in the specified configuration file. If the variables in the hash
already exist in the file, they will be replaced. If they do not exist they will be
added before the first line beginning with C<1;>.

=over 4

=item targetFile

Name of the configuration file to be changed.

=item changes

Reference to a hash mapping variable names to string values.

=item inserts

Reference to a list of lines to be inserted at the beginning.


#: Return Type ;
sub ModifyConfigFile {
    # Get the parameters.
    my ($targetFile, $changes, $inserts) = @_;
    # Insure the target file exists.
    if (! -e $targetFile) {
        Confess("Configuration file $targetFile not found in ModifyConfigFile.");
    } else {
        # Create a temporary file name from the target file name.
        my $tempFile = "$targetFile~";
        # Create a hash for tracking variable names used.
        my %varHash = ();
        # Open the target file for input and the temp file for output.
        Open(\*CONFIGIN, "<$targetFile");
        Open(\*CONFIGOUT, ">$tempFile");
        # Denote we haven't found a trailer line.
        my $oneFound = 0;
        # Read through the target file.
        while (my $line = <CONFIGIN>) {
            # Parse the input line. Note we look for the longest possible string value
            # that does not extend into the comment field.
            if ($line =~ /^\s*\$(\S+)\s*=\s*"([^#]*)";\s+(.*)$/) {
                # Get the variable name and the value string.
                my ($varName, $value, $comment) = ($1, $2, $3);
                # See if this variable name has a new value.
                if (exists $changes->{$varName}) {
                    # Get the new value.
                    $value = $changes->{$varName};
                    # Denote it's been used.
                    $varHash{$varName} = 1;
                # Write out the assignment statement.
                my $newLine = _BuildAssignment($varName, $value, $comment);
                print CONFIGOUT $newLine;
            } elsif ($line =~ /^1;/) {
                # This is the end line, so we write out the rest of the variables.
                my $lineCount = 0;
                for my $varName (keys %{$changes}) {
                    # Find out if this variable has already been seen.
                    if (! exists $varHash{$varName}) {
                        # It hasn't been seen, so we need to add it to the output.
                        my $value = $changes->{$varName};
                        my $newLine = _BuildAssignment($varName, $value, "");
                        Trace("Adding new value for $varName to config file.") if T(3);
                        print CONFIGOUT $newLine;
                Trace("$lineCount lines updated in config file.") if T(2);
                # Write out the end line.
                print CONFIGOUT "1;\n";
                # Denote we found it.
                $oneFound = 1;
            } elsif ($line =~ /package\s/i) {
                # Here we have a package statement. We write it out followed by the
                # insert lines.
                print CONFIGOUT $line;
                # Only proceed if insert lines were specified.
                if (defined $inserts) {
                    foreach my $insert (@{$inserts}) {
                        print CONFIGOUT "$insert\n";
            } else {
                # Here the line doesn't parse, so we write it unmodified.
                print CONFIGOUT $line;
        # Complain if we didn't find a trailer.
        if (! $oneFound) {
            Confess("No trailer (1;) found in FIG_Config.pm.");
        } else {
            # Close the files and rename the output file so it overwrites the input file.
            close CONFIGIN;
            close CONFIGOUT;
            rename $tempFile, $targetFile;

=head3 Augment

C<< DocUtils::Augment($inFile, $outDirectory, @statements); >>

Augment a PERL script file by adding a set of pre-defined statements. The statements
will be added immediately after the shebang line, if one is present. Otherwise they will
be added to the beginning of the file. The augmented file will have the same name
as the original file but will be placed in the specified output directory.

=over 4

=item inFile

Name of the input file.

=item outDirectory

Name of the directory to contain the output file.

=item libs

Statements to be added to the output file.



sub Augment {
    # Get the parameters.
    my ($inFile, $outDirectory, @statements) = @_;
    # Get the input file name components.
    my ($fileName, $inDirectory) = fileparse($inFile);
    # Construct the output file name.
    my $outFile = "$outDirectory/$fileName";
    # Open the input and output files.
    (open INFILE, '<', $inFile) || Confess("Could not open input file $inFile.");
    (open OUTFILE, '>', $outFile) || Confess("Could not open output file $outFile.");
    # Get the first input line.
    my $line = <INFILE>;
    # If it's a shebang and we have statements to insert, echo
    # it out and save a blank line for later.
    if ($#statements >= 0 && $line =~ /#!/) {
        print OUTFILE $line;
        $line = "\n";
    # Write out the augmenting statements.
    for my $statement (@statements) {
        print OUTFILE "$statement\n";
    # Echo the saved line.
    print OUTFILE $line;
    # Spin out the rest of the file.
    while ($line = <INFILE>) {
        # If we're in PERL mode, we need to check for a duplicate line.
        print OUTFILE $line;
    # Close both files.
    close INFILE;
    close OUTFILE;

=head3 GetDirectory

C<< my $fileHash = DocUtils::GetDirectory($directoryName); >>

Get a list of the files in the specified directory. The files will be returned as
a hash of lists. The hash will map the various file extensions to the corresponding
file titles. So, for example, if the directory contained C<Sprout.pm>, C<DocUtils.pl>,
C<Tracer.pm>, C<Genome.pm>, and C<Makefile>, the hash returned would be

C<< ( pm => ['Sprout', 'Tracer', 'Genome'], pl => ['DocUtils'], '' => ['Makefile'] ) >>

=over 4

=item directoryName

Name of the directory whose files are desired.

=item RETURN

Returns a reference to a hash mapping each file extension to a list of the titles 
of files having that extension.



sub GetDirectory {
    # Get the parameter.
    my ($directoryName) = @_;
    # Create the return hash.
    my %retVal = ();
    # Open the directory and read in the file names.
    (opendir INDIR, $directoryName) || Confess("Could not open directory $directoryName.");
    my @fileNames = readdir INDIR;
    # Create the variables for holding the file titles and extensions.
    my ($ext, $title);
    # Loop through the files.
    for my $fileName (@fileNames) {
        # Separate the file name into a title and an extension.
        if ($fileName =~ /^\./) {
            # Ignore filenames that start with a period.
        } elsif ($fileName =~ /(.+)\.([^.]*)$/) {
            ($title, $ext) = ($1, $2);
            # Add the file's data into the hash.
            Tracer::AddToListMap(\%retVal, $ext, $title);
        } elsif ($fileName) {
            # Here the file name does not have an extension. Note that null filenames and
            # the various hidden files are skipped.
            ($title, $ext) = ($fileName, '');
            # Add the file's data into the hash.
            Tracer::AddToListMap(\%retVal, $ext, $title);
    # Return the result hash.
    return \%retVal;

=head3 CheckFile

C<< my $updated = DocUtils::CheckFile($inFile, $outDirectory, $suffix); >>

This method compares the modification date of a specified file against the
date of a similarly-named file in the specified output directory. It
returns TRUE if the matching output file does not exist or has been
modified since the last modification of the matching file in the output

=over 4

=item inFile

Name of the input file.

=item outDirectory

Directory to contain the output file.

=item suffix (optional)

If specified, a suffix to be added to the input file name to create the
output file name.



sub CheckFile {
    # Get the parameters.
    my ($inFile, $outDirectory, $suffix) = @_;
    # Create the return variable.
    my $retVal = 1;
    # Get the name of the output file.
    my ($fileTitle, $inDirectory) = fileparse($inFile);
    my $outFileName = "$outDirectory/$fileTitle";
    if ($suffix) {
        $outFileName .= $suffix;
    # Check to see if the output file exists.
    if (-e $outFileName) {
        # Get the input and output modify times.
        my $inTime = stat($inFile)->mtime;
        my $outTime = stat($outFileName)->mtime;
        # If the output file is newer, return FALSE.
        if ($outTime >= $inTime) {
            $retVal = 0;
    # Return the determination indicator.
    return $retVal;

=head2 Internal Utilities

=head3 SortIndexes

Sort a POD index. The index is provided as a list and is returned as a list.
The index itself consists of <ul>, </ul>, and <li> lines. Each <ul> adds an
indent level and each </ul> subtracts one. Only the <li> lines at the lowest 
level are sorted. This is not very general-purpose, but it is enough to handle
the output from POD and the ERDB module.

This is a static method.

=over 4

=item buffer

List of lines making up the index.

=item RETURN

A list of the same lines in a more useful order.



sub SortIndexes {
    # Get the list of lines to sort.
    my @buffer = @_;
    # Declare the output array.
    my @retVal = ();
    # Now we need to read through the buffer. At all times, we remember the
    # location of the first <li> line in the current group. If a <ul> line is
    # found, the current group is written to the output array unmodified and
    # a new group starts. If a </ul> line is found, the current group is written
    # to the output array after sorting. Thereafter, there is no current group
    # until we find another <ul> line.
    my @currentGroup = ();
    # Denote we're not currently inside a group.
    my $inGroup = 0;
    # Denote we're not looking for the first line of a group.
    my $looking = 0;
    for my $record (@buffer) {
        # Determine the type of record.
        if ($record =~ m!^\s*$!) {
            # Blank lines are ignored.
        } elsif ($record =~ m!<ul>!) {
            # Here we have the start of a new list. Denote we're
            # no longer inside a group but we're looking for a
            # new one.
            push @currentGroup, $record;
            $inGroup = 0;
            $looking = 1;
        } elsif ($record =~ m!</ul>!) {
            # Here we have the end of a list. If we're inside a group,
            # we sort it and call for a flush.
            if ($inGroup) {
                my @sortedGroup = sort @currentGroup;
                push @sortedGroup, $record;
                @currentGroup = @sortedGroup;
                $inGroup = 0;
                $looking = 0;
            } else {
                # Otherwise we echo the record.
                push @retVal, $record;
        } else {
            # Here we have a line item. If we're looking, we start a group.
            # if we're in a group, we continue the group. Otherwise, we
            # echo the record.
            if ($looking) {
                @currentGroup = ($record);
                $inGroup = 1;
                $looking = 0;
            } elsif ($inGroup) {
                push @currentGroup, $record;
            } else {
                push @retVal, $record;
        # If we're not in a group and we have records, we must flush them.
        if (@currentGroup > 0 && !$inGroup) {
            push @retVal, @currentGroup;
            @currentGroup = ();
    # Return the modified list.
    return @retVal;

=head2 Private Utilities

=head3 _BuildAssignment

C<< my $statement = _BuildAssignment($varName, $value, $comment); >>

Create an assignment statement out of the specified components.

=over 4

=item varName

Variable name.

=item value

Value to be assigned to the variable (will be quoted).

=item comment

Comments or trailing characters.


#: Return Type $;
sub _BuildAssignment {
    # Get the parameters.
    my ($varName, $value, $comment) = @_;
    # Pad the variable name.
    my $varPad = Tracer::Pad($varName, 30);
    # Return the assignment statement.
    my $retVal = '$' . "$varPad = \"$value\"; $comment\n";
    return $retVal;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3