[Bio] / FigKernelPackages / DocUtils.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/DocUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : olson 1.3 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : parrello 1.1 package DocUtils;
19 :    
20 :     =head1 Sprout Documentation Utilities
21 :    
22 :     =head2 Introduction
23 :    
24 :     This module processes documentation files. It can invoke C<pod2html> to convert a PM
25 :     file to a POD document or stylize an existing HTML file.
26 :    
27 :     Stylization is automatically performed on all POD documents. It involves two separate
28 :     steps. First, it sorts the table of contents in alphabetical order; second, it inserts
29 :     a style link to a specified cascading style sheet. This makes the POD document easier
30 :     to read and easier to navigate.
31 :    
32 :     =cut
33 :    
34 :     use strict;
35 :     use Tracer;
36 :     use File::Basename;
37 :     use File::stat;
38 :    
39 :     =head2 Public Methods
40 :    
41 :     =head3 PerlDoc
42 :    
43 :     C<< DocUtils::PerlDoc($inFile, $docDirectory, $style); >>
44 :    
45 :     This method generates an HTML document for the specified PERL source file in the
46 :     specified output directory. If a style file name is specified, it will be added
47 :     as a style link. The style file name should be specified relative to the output
48 :     directory. So, for example
49 :    
50 :     C<< DocUtils::PerlDoc('lib/Sprout.pm', 'CGI/Html/pod_doc', '../Georgia.css'); >>
51 :    
52 :     would produce the POD documentation for C<Sprout.pm> in the C<lib> directory.
53 :     The output file would be C<CGI/Html/pod_doc/Sprout.html>, and it would reference
54 :     the style file C<CGI/Html/Georgia.css>.
55 :    
56 :     =over 4
57 :    
58 :     =item inFile
59 :    
60 :     Name of the PERL source file.
61 :    
62 :     =item docDirectory
63 :    
64 :     Output directory into which the HTML document should be placed.
65 :    
66 :     =item style (optional)
67 :    
68 :     If specified, the name of the style file to be used, relative to the output directory.
69 :    
70 :     =back
71 :    
72 :     =cut
73 :    
74 :     sub PerlDoc {
75 : parrello 1.5 # Get the parameters.
76 :     my ($inFile, $docDirectory, $style) = @_;
77 :     # Get the file name components.
78 :     my ($fileTitle, $inDirectory, $fileType) = fileparse($inFile, qr{\.[^.]+$});
79 :     # Check the file for POD stuff.
80 :     (open PODCHECK, $inFile) || Confess("Could not open POD input file $inFile.");
81 :     my $podFound = 0;
82 :     while (! $podFound && (my $line = <PODCHECK>)) {
83 :     if ($line =~ /^=(head|pod)/i) {
84 :     # We consider a file to have POD in it if we find a POD line or
85 :     # a header line.
86 :     $podFound = 1;
87 :     }
88 :     }
89 :     close PODCHECK;
90 :     # Check to see if we found POD stuff.
91 :     if (! $podFound) {
92 :     Trace("Module $fileTitle in $inDirectory does not have POD documentation.") if T(2);
93 :     } else {
94 :     Trace("Generating POD documentation for module $fileTitle.") if T(2);
95 :     # Here we need to create POD documentation for this input file.
96 :     # Switch to the input directory.
97 :     chdir $inDirectory;
98 :     my $result = system("pod2html --infile=$fileTitle$fileType --outfile=$fileTitle.tmp --title=$fileTitle") >> 8;
99 :     Trace("POD for $inFile returned $result.") if T(1);
100 :     # Compute the output file name. We convert the input file base name by translating
101 :     # the period to a hyphen and then suffix "html".
102 :     $fileType =~ tr'.'-';
103 :     my $outFileName = "$docDirectory/$fileTitle$fileType.html";
104 :     # Customize the result.
105 :     Stylize("$fileTitle.tmp", $style, $outFileName);
106 :     unlink "$fileTitle.tmp";
107 :     }
108 : parrello 1.1 }
109 :    
110 :     =head3 Stylize
111 :    
112 :     C<< DocUtils::Stylize($inFile, $style, $outFile); >>
113 :    
114 :     Dress up an HTML file. Dressing up the file includes putting a header line in for
115 :     the table of contents, adding a style link, and sorted the table of contents into
116 :     alphabetical order for ease of maneuvering. The Table of Contents is identified by
117 :     two HTML comment lines:
118 :    
119 :     C<< <!-- INDEX BEGIN --> >>
120 :    
121 :     C<< <!-- INDEX END --> >>
122 :    
123 :     These are automatically inserted into the output of the C<pod2html> program. The directory
124 :     is presumed to be in outline form, using C<ul> and C<li> tags. Only the lowest level of
125 :     the outline is sorted.
126 :    
127 :     =over 4
128 :    
129 :     =item inFile
130 :    
131 :     Input file name.
132 :    
133 :     =item style
134 :    
135 :     Style file name, relative to the output file's directory. If null or undefined, no style link is added.
136 :    
137 :     =item outFile
138 :    
139 :     Output file name.
140 :    
141 :     =back
142 :    
143 :     =cut
144 :    
145 :     sub Stylize {
146 : parrello 1.5 # Get the parameters.
147 :     my ($inFile, $style, $outFile) = @_;
148 :     Trace("Stylizing $inFile into $outFile.") if T(3);
149 :     # Open the files.
150 :     (open INFILE, '<', $inFile) || Confess("Could not open input file $inFile.");
151 :     (open OUTFILE, '>', $outFile) || Confess("Could not open output file $outFile.");
152 :     # Loop through the input file.
153 :     while (<INFILE>) {
154 :     # Check for one of the marker lines.
155 :     if (m!^\s*<body!i) {
156 :     # Here we have a body tag. We replace it with a null body tag and a header line
157 :     # for the table of contents.
158 :     print OUTFILE "<body>\n<h2>Table of Contents</h2>\n";
159 :     } elsif (m!^\s*</head!i) {
160 :     # Here we have the end of the heading section. We may need to insert a style link.
161 :     if ($style) {
162 :     print OUTFILE "<link href=\"$style\" rel=\"stylesheet\" type=\"text/css\">\n";
163 :     }
164 :     print OUTFILE "</head>\n";
165 :     } elsif (m/^<!-- INDEX BEGIN/) {
166 :     # Here we have the start of a POD index. We need to sort the sections.
167 :     # Read the whole index into a buffer.
168 :     Trace("Buffering table of contents.") if T(4);
169 :     my @buffer = ();
170 :     my $endFound = 0;
171 :     while (!$endFound && (my $record = <INFILE>)) {
172 :     if (m/^<!-- INDEX END/) {
173 :     $endFound = 1;
174 :     } else {
175 :     push @buffer, $record;
176 :     }
177 :     }
178 :     # Sort the buffer.
179 :     Trace("Sorting table of contents.") if T(4);
180 :     my @sortedBuffer = SortIndexes(@buffer);
181 :     # Write it back out.
182 :     for my $record (@sortedBuffer) {
183 :     print OUTFILE $record;
184 :     }
185 :     Trace("Sort complete.") if T(4);
186 :     } else {
187 :     # Here we have a normal line, so we write it unmodified.
188 :     print OUTFILE $_;
189 :     }
190 :     }
191 :     # Close the files.
192 :     close INFILE;
193 :     close OUTFILE;
194 :     Trace("Stylized file is at $outFile.") if T(4);
195 : parrello 1.1 }
196 :    
197 : parrello 1.6 =head3 ModifyConfigFile
198 :    
199 :     C<< DocUtils::ModifyConfigFile($targetFile, \%changes); >>
200 :    
201 :     Modify the contents of a PERL configuration file. A PERL configuration file contains a
202 :     C<package> statement followed by a set of assignments having the form
203 :    
204 :     $var_name = "string";
205 :    
206 :     with optional comments. The caller passes in a hash keyed by variable name, and the
207 :     configuration file will be updated to insure the variables mentioned in the hash have
208 :     the associated value in the specified configuration file. If the variables in the hash
209 :     already exist in the file, they will be replaced. If they do not exist they will be
210 :     added before the first line beginning with C<1;>.
211 :    
212 :     =over 4
213 :    
214 :     =item targetFile
215 :    
216 :     Name of the configuration file to be changed.
217 :    
218 :     =item changes
219 :    
220 :     Reference to a hash mapping variable names to string values.
221 :    
222 :     =back
223 :    
224 :     =cut
225 :     #: Return Type ;
226 :     sub ModifyConfigFile {
227 :     # Get the parameters.
228 :     my ($targetFile, $changes) = @_;
229 :     # Insure the target file exists.
230 :     if (! -e $targetFile) {
231 :     Confess("Configuration file $targetFile not found in ModifyConfigFile.");
232 :     } else {
233 :     # Create a temporary file name from the target file name.
234 :     my $tempFile = "$targetFile~";
235 :     # Create a hash for tracking variable names used.
236 :     my %varHash = ();
237 :     # Open the target file for input and the temp file for output.
238 :     Open(\*CONFIGIN, "<$targetFile");
239 :     Open(\*CONFIGOUT, ">$tempFile");
240 :     # Read through the target file.
241 :     while (my $line = <CONFIGIN>) {
242 :     # Parse the input line. Note we look for the longest possible string value
243 :     # that does not extend into the comment field.
244 :     if ($line =~ /^\s*\$(\S+)\s*=\s*"([^#]*)";\s+(.*)$/) {
245 :     # Get the variable name and the value string.
246 :     my ($varName, $value, $comment) = ($1, $2, $3);
247 :     # See if this variable name has a new value.
248 :     if (exists $changes->{$varName}) {
249 :     # Get the new value.
250 :     $value = $changes->{$varName};
251 :     # Denote it's been used.
252 :     $varHash{$varName} = 1;
253 :     }
254 :     # Write out the assignment statement.
255 :     my $newLine = _BuildAssignment($varName, $value, $comment);
256 :     print CONFIGOUT $newLine;
257 :     } elsif ($line =~ /^1;/) {
258 :     # This is the end line, so we write out the rest of the variables.
259 :     for my $varName (keys %{$changes}) {
260 :     # Find out if this variable has already been seen.
261 :     if (! exists $varHash{$varName}) {
262 :     # It hasn't been seen, so we need to add it to the output.
263 :     my $value = $changes->{$varName};
264 :     my $newLine = _BuildAssignment($varName, $value, "");
265 :     print CONFIGOUT $newLine;
266 :     }
267 :     }
268 :     } else {
269 :     # Here the line doesn't parse, so we write it unmodified.
270 :     print CONFIGOUT $line;
271 :     }
272 :     }
273 :     # Close the files and rename the output file so it overwrites the input file.
274 :     close CONFIGIN;
275 :     close CONFIGOUT;
276 :     rename $tempFile, $targetFile;
277 :     }
278 :     }
279 :    
280 : parrello 1.1 =head3 Augment
281 :    
282 :     C<< DocUtils::Augment($inFile, $outDirectory, @statements); >>
283 :    
284 :     Augment a PERL script file by adding a set of pre-defined statements. The statements
285 :     will be added immediately after the shebang line, if one is present. Otherwise they will
286 :     be added to the beginning of the file. The augmented file will have the same name
287 :     as the original file but will be placed in the specified output directory.
288 :    
289 :     =over 4
290 :    
291 :     =item inFile
292 :    
293 :     Name of the input file.
294 :    
295 :     =item outDirectory
296 :    
297 :     Name of the directory to contain the output file.
298 :    
299 :     =item libs
300 :    
301 :     Statements to be added to the output file.
302 :    
303 :     =back
304 :    
305 :     =cut
306 :    
307 :     sub Augment {
308 : parrello 1.5 # Get the parameters.
309 :     my ($inFile, $outDirectory, @statements) = @_;
310 :     # Get the input file name components.
311 :     my ($fileName, $inDirectory) = fileparse($inFile);
312 :     # Construct the output file name.
313 :     my $outFile = "$outDirectory/$fileName";
314 :     # Open the input and output files.
315 :     (open INFILE, '<', $inFile) || Confess("Could not open input file $inFile.");
316 :     (open OUTFILE, '>', $outFile) || Confess("Could not open output file $outFile.");
317 :     # Get the first input line.
318 :     my $line = <INFILE>;
319 :     # If it's a shebang and we have statements to insert, echo
320 :     # it out and save a blank line for later.
321 :     if ($#statements >= 0 && $line =~ /#!/) {
322 :     print OUTFILE $line;
323 :     $line = "\n";
324 :     }
325 :     # Write out the augmenting statements.
326 :     for my $statement (@statements) {
327 :     print OUTFILE "$statement\n";
328 :     }
329 :     # Echo the saved line.
330 :     print OUTFILE $line;
331 :     # Spin out the rest of the file.
332 :     while ($line = <INFILE>) {
333 :     # If we're in PERL mode, we need to check for a duplicate line.
334 :     print OUTFILE $line;
335 :     }
336 :     # Close both files.
337 :     close INFILE;
338 :     close OUTFILE;
339 : parrello 1.1 }
340 :    
341 :     =head3 GetDirectory
342 :    
343 :     C<< my $fileHash = DocUtils::GetDirectory($directoryName); >>
344 :    
345 :     Get a list of the files in the specified directory. The files will be returned as
346 :     a hash of lists. The hash will map the various file extensions to the corresponding
347 :     file titles. So, for example, if the directory contained C<Sprout.pm>, C<DocUtils.pl>,
348 :     C<Tracer.pm>, C<Genome.pm>, and C<Makefile>, the hash returned would be
349 :    
350 :     C<< ( pm => ['Sprout', 'Tracer', 'Genome'], pl => ['DocUtils'], '' => ['Makefile'] ) >>
351 :    
352 :     =over 4
353 :    
354 :     =item directoryName
355 :    
356 :     Name of the directory whose files are desired.
357 :    
358 :     =item RETURN
359 :    
360 :     Returns a reference to a hash mapping each file extension to a list of the titles
361 :     of files having that extension.
362 :    
363 :     =back
364 :    
365 :     =cut
366 :    
367 :     sub GetDirectory {
368 : parrello 1.5 # Get the parameter.
369 :     my ($directoryName) = @_;
370 :     # Create the return hash.
371 :     my %retVal = ();
372 :     # Open the directory and read in the file names.
373 :     (opendir INDIR, $directoryName) || Confess("Could not open directory $directoryName.");
374 :     my @fileNames = readdir INDIR;
375 :     # Create the variables for holding the file titles and extensions.
376 :     my ($ext, $title);
377 :     # Loop through the files.
378 :     for my $fileName (@fileNames) {
379 :     # Separate the file name into a title and an extension.
380 :     if ($fileName =~ /^\./) {
381 :     # Ignore filenames that start with a period.
382 :     } elsif ($fileName =~ /(.+)\.([^.]*)$/) {
383 :     ($title, $ext) = ($1, $2);
384 :     # Add the file's data into the hash.
385 :     Tracer::AddToListMap(\%retVal, $ext, $title);
386 :     } elsif ($fileName) {
387 :     # Here the file name does not have an extension. Note that null filenames and
388 :     # the various hidden files are skipped.
389 :     ($title, $ext) = ($fileName, '');
390 :     # Add the file's data into the hash.
391 :     Tracer::AddToListMap(\%retVal, $ext, $title);
392 :     }
393 :     }
394 :     # Return the result hash.
395 :     return \%retVal;
396 : parrello 1.1 }
397 :    
398 :     =head3 CheckFile
399 :    
400 : parrello 1.2 C<< my $updated = DocUtils::CheckFile($inFile, $outDirectory, $suffix); >>
401 : parrello 1.1
402 :     This method compares the modification date of a specified file against the
403 :     date of a similarly-named file in the specified output directory. It
404 :     returns TRUE if the matching output file does not exist or has been
405 :     modified since the last modification of the matching file in the output
406 :     directory.
407 :    
408 :     =over 4
409 :    
410 :     =item inFile
411 :    
412 :     Name of the input file.
413 :    
414 :     =item outDirectory
415 :    
416 :     Directory to contain the output file.
417 :    
418 : parrello 1.2 =item suffix (optional)
419 :    
420 :     If specified, a suffix to be added to the input file name to create the
421 :     output file name.
422 :    
423 : parrello 1.1 =back
424 :    
425 :     =cut
426 :    
427 :     sub CheckFile {
428 : parrello 1.5 # Get the parameters.
429 :     my ($inFile, $outDirectory, $suffix) = @_;
430 :     # Create the return variable.
431 :     my $retVal = 1;
432 :     # Get the name of the output file.
433 :     my ($fileTitle, $inDirectory) = fileparse($inFile);
434 :     my $outFileName = "$outDirectory/$fileTitle";
435 :     if ($suffix) {
436 :     $outFileName .= $suffix;
437 :     }
438 :     # Check to see if the output file exists.
439 :     if (-e $outFileName) {
440 :     # Get the input and output modify times.
441 :     my $inTime = stat($inFile)->mtime;
442 :     my $outTime = stat($outFileName)->mtime;
443 :     # If the output file is newer, return FALSE.
444 :     if ($outTime >= $inTime) {
445 :     $retVal = 0;
446 :     }
447 :     }
448 :     # Return the determination indicator.
449 :     return $retVal;
450 : parrello 1.1 }
451 :    
452 :     =head2 Internal Utilities
453 :    
454 :     =head3 SortIndexes
455 :    
456 :     Sort a POD index. The index is provided as a list and is returned as a list.
457 :     The index itself consists of <ul>, </ul>, and <li> lines. Each <ul> adds an
458 :     indent level and each </ul> subtracts one. Only the <li> lines at the lowest
459 :     level are sorted. This is not very general-purpose, but it is enough to handle
460 :     the output from POD and the ERDB module.
461 :    
462 :     This is a static method.
463 :    
464 :     =over 4
465 :    
466 :     =item buffer
467 :    
468 :     List of lines making up the index.
469 :    
470 :     =item RETURN
471 :    
472 :     A list of the same lines in a more useful order.
473 :    
474 :     =back
475 :    
476 :     =cut
477 :    
478 :     sub SortIndexes {
479 : parrello 1.5 # Get the list of lines to sort.
480 :     my @buffer = @_;
481 :     # Declare the output array.
482 :     my @retVal = ();
483 :     # Now we need to read through the buffer. At all times, we remember the
484 :     # location of the first <li> line in the current group. If a <ul> line is
485 :     # found, the current group is written to the output array unmodified and
486 :     # a new group starts. If a </ul> line is found, the current group is written
487 :     # to the output array after sorting. Thereafter, there is no current group
488 :     # until we find another <ul> line.
489 :     my @currentGroup = ();
490 :     # Denote we're not currently inside a group.
491 :     my $inGroup = 0;
492 :     # Denote we're not looking for the first line of a group.
493 :     my $looking = 0;
494 :     for my $record (@buffer) {
495 :     # Determine the type of record.
496 :     if ($record =~ m!^\s*$!) {
497 :     # Blank lines are ignored.
498 :     } elsif ($record =~ m!<ul>!) {
499 :     # Here we have the start of a new list. Denote we're
500 :     # no longer inside a group but we're looking for a
501 :     # new one.
502 :     push @currentGroup, $record;
503 :     $inGroup = 0;
504 :     $looking = 1;
505 :     } elsif ($record =~ m!</ul>!) {
506 :     # Here we have the end of a list. If we're inside a group,
507 :     # we sort it and call for a flush.
508 :     if ($inGroup) {
509 :     my @sortedGroup = sort @currentGroup;
510 :     push @sortedGroup, $record;
511 :     @currentGroup = @sortedGroup;
512 :     $inGroup = 0;
513 :     $looking = 0;
514 :     } else {
515 :     # Otherwise we echo the record.
516 :     push @retVal, $record;
517 :     }
518 :     } else {
519 :     # Here we have a line item. If we're looking, we start a group.
520 :     # if we're in a group, we continue the group. Otherwise, we
521 :     # echo the record.
522 :     if ($looking) {
523 :     @currentGroup = ($record);
524 :     $inGroup = 1;
525 :     $looking = 0;
526 :     } elsif ($inGroup) {
527 :     push @currentGroup, $record;
528 :     } else {
529 :     push @retVal, $record;
530 :     }
531 :     }
532 :     # If we're not in a group and we have records, we must flush them.
533 :     if (@currentGroup > 0 && !$inGroup) {
534 :     push @retVal, @currentGroup;
535 :     @currentGroup = ();
536 :     }
537 :     }
538 :     # Return the modified list.
539 :     return @retVal;
540 : parrello 1.1 }
541 :    
542 : parrello 1.6 =head2 Private Utilities
543 :    
544 :     =head3 _BuildAssignment
545 :    
546 :     C<< my $statement = _BuildAssignment($varName, $value, $comment); >>
547 :    
548 :     Create an assignment statement out of the specified components.
549 :    
550 :     =over 4
551 :    
552 :     =item varName
553 :    
554 :     Variable name.
555 :    
556 :     =item value
557 :    
558 :     Value to be assigned to the variable (will be quoted).
559 :    
560 :     =item comment
561 :    
562 :     Comments or trailing characters.
563 :    
564 :     =back
565 :    
566 :     =cut
567 :     #: Return Type $;
568 :     sub _BuildAssignment {
569 :     # Get the parameters.
570 :     my ($varName, $value, $comment) = @_;
571 :     # Pad the variable name.
572 :     my $varPad = Tracer::Pad($varName, 30);
573 :     # Return the assignment statement.
574 :     my $retVal = '$' . "$varPad = \"$value\"; $comment\n";
575 :     return $retVal;
576 :     }
577 :    
578 : golsen 1.4 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3