[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Thu Aug 24 21:17:08 2006 UTC revision 1.26, Tue Feb 5 05:15:42 2008 UTC
# Line 3  Line 3 
3  =head1 New Stuff Checker  =head1 New Stuff Checker
4    
5  This script compares the genomes, features, and annotations in  This script compares the genomes, features, and annotations in
6  the old and new sprouts and lists the differences.  the old and new sprouts and lists the differences and produces
7    a report in HTML. The output is an HTML fragment, not an entire
8    web page. This is because we expect it to be included in another
9    page.
10    
11  The currently-supported command-line options are as follows.  The currently-supported command-line options for NewStuffCheck are as follows.
12    
13  =over 4  =over 4
14    
 =item summary  
   
 Do not display details, only difference summaries.  
   
 =item nofeats  
   
 Do not process features.  
   
15  =item user  =item user
16    
17  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 47  Line 42 
42    
43  Phone number to message when the script is complete.  Phone number to message when the script is complete.
44    
45    =item groupFile
46    
47    Name of the group file (described below). The default is C<groups.tbl>
48    in the Sprout data directory.
49    
50    =item outFile
51    
52    Output file name. The default is C<html/includes/diff.inc> in the
53    nmpdr C<next> directory.
54    
55    =item orgFile
56    
57    Output file for the genome report. The default is C<html/includes/genomes.inc> in
58    the nmpdr C<next> directory.
59    
60    =back
61    
62    =head2 The Group File
63    
64    A key data file for this process is C<groups.tbl>. This file is kept in the
65    Sprout Data directory, and contains the following columns:
66    
67    =over 4
68    
69    =item name
70    
71    Name of the group.
72    
73    =item page
74    
75    Name of the group's page on the web site (e.g. C<campy.php> for
76    Campylobacter)
77    
78    =item genus
79    
80    Genus of the group
81    
82    =item species
83    
84    Species of the group, or an empty string if the group is for an entire
85    genus. If the group contains more than one species, the species names
86    should be separated by commas.
87    
88  =back  =back
89    
90  =cut  =cut
91    
92  use strict;  use strict;
93  use Tracer;  use Tracer;
 use DocUtils;  
 use TestUtils;  
94  use Cwd;  use Cwd;
95  use File::Copy;  use File::Copy;
96  use File::Path;  use File::Path;
97  use FIG;  use FIG;
98  use SFXlate;  use SFXlate;
99  use Sprout;  use Sprout;
100    use CGI;
101    use FIGRules;
102    
103  # Get the command-line options and parameters.  # Get the command-line options and parameters.
104  my ($options, @parameters) = StandardSetup([qw(Sprout) ],  my ($options, @parameters) = StandardSetup([qw(Sprout) ],
105                                             {                                             {
106                                                nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"],                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
107                                                trace => ["2-", "tracing level; use a minus to prevent tracing to standard output"],                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
                                               summary => ["", "if specified, detailed lists of the different items will not be displayed"],  
108                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
109                                                  outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
110                                                  orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
111                                             },                                             },
112                                             "",                                             "",
113                                             @ARGV);                                             @ARGV);
# Line 76  Line 115 
115  my $rtype;  my $rtype;
116  # Insure we catch errors.  # Insure we catch errors.
117  eval {  eval {
118        # Get a CGI object for building the output. We pass it the options hash so
119        # the formatting subroutines have access to it. Also, we want it to know
120        # we're not a real web script.
121        my $cgi = CGI->new($options);
122        # Start accumulating HTML data.
123        my @html = ();
124        # Open the output file. We do this early in case there's a problem.
125        my $outFileName = $options->{outFile};
126        Trace("Opening output file $outFileName.") if T(2);
127        Open(\*OUTPUT, ">$outFileName");
128        # Get a nice-looking version name and make it into a title.
129        my $version = uc $FIG_Config::nmpdr_version;
130        $version =~ tr/_/ /;
131        push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
132        # Start the table.
133        push @html, $cgi->start_table({align => "center", border => "2"});
134        # Get the group file.
135        my $groupFileName = $options->{groupFile};
136        Trace("Reading group file $groupFileName.") if T(2);
137      Trace("Processing genomes.") if T(2);      Trace("Processing genomes.") if T(2);
138      # Get the current SEED.      # Get the current SEED.
139      my $fig = FIG->new();      my $fig = FIG->new();
140      # Get the old Sprout.      # Get the old Sprout.
141      my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);      my $oldSprout = SFXlate->old_sprout_only();
142      # Get its genomes in alphabetical order.      # Get its genomes in alphabetical order.
143      my @oldGenomes = GetGenomes($oldSprout);      my @oldGenomes = GetGenomes($oldSprout);
144      # Get the new Sprout.      # Get the new Sprout.
# Line 89  Line 147 
147      my @newGenomes = GetGenomes($newSprout);      my @newGenomes = GetGenomes($newSprout);
148      # Compare the two genomes lists.      # Compare the two genomes lists.
149      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
150      # Add feature counts to the new genomes.      # Get the super-group data.
151        my %superTable = $newSprout->CheckGroupFile();
152        # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
153        # lists so that the genome names are first and the IDs second.
154        my @insertedGenomeList = ();
155      for my $insertedGenome (@{$insertedGenomes}) {      for my $insertedGenome (@{$insertedGenomes}) {
156          my $genomeID = $insertedGenome->[0];          my $genomeID = $insertedGenome->[0];
157          # For a new genome, display the feature count.          # For a new genome, display the feature and BBH counts.
158          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
159                                           [$genomeID]);                                           [$genomeID]);
160          my $suffix = ($count == 1 ? " one feature" : "$count features");          my $suffix = ($count == 1 ? " one feature" : "$count features");
161          $insertedGenome->[1] .= "($suffix)";          my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
162      }          $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
163      # Add information about SEED status to the deleted genomes.          push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
164        }
165        # Create a list for the deleted genomes that contains information about SEED status.
166        # This list is flipped, too.
167        my @deletedGenomeList = ();
168      for my $deletedGenome (@{$deletedGenomes}) {      for my $deletedGenome (@{$deletedGenomes}) {
169          my $genomeID = $deletedGenome->[0];          my $genomeID = $deletedGenome->[0];
170            my $suffix = "";
171          if ($fig->is_genome($genomeID)) {          if ($fig->is_genome($genomeID)) {
172                # Here the deleted genome is still in the SEED.
173              my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");              my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
174              $deletedGenome->[1] .= "(still in SEED, $complete)";              $suffix = " (still in SEED, $complete)";
175            } else {
176                # It's not in the SEED. See if it has been replaced.
177                my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
178                my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
179                if (scalar @genomeIDs) {
180                    $suffix = " (replaced)";
181          }          }
182      }      }
183            push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
184        }
185      # Display the lists.      # Display the lists.
186      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Genomes'     => \@insertedGenomeList,
187                'New Genomes'     => $insertedGenomes,                                  'Deleted Genomes' => \@deletedGenomeList);
               'Deleted Genomes' => $deletedGenomes);  
188      # Now the groups.      # Now the groups.
189      Trace("Comparing groups.") if T(2);      Trace("Comparing groups.") if T(2);
190      my %oldGroups = $oldSprout->GetGroups();      my %oldGroups = $oldSprout->GetGroups();
# Line 121  Line 196 
196          if (! exists $oldGroups{$newGroup}) {          if (! exists $oldGroups{$newGroup}) {
197              # Construct a list of this group's genes.              # Construct a list of this group's genes.
198              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
199              ShowLists(! $options->{summary}, "Genomes in new group $newGroup" => \@groupGenomes);              push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
200          } else {          } else {
201              # Here the group is in both versions. Fix the lists and compare them.              # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
202              my @newGroupList = NameGenomes($newSprout, $newGroups{$newGroup});              # on the genome ID, which will become the second list element after the call to NameGenomes.
203              my @oldGroupList = NameGenomes($oldSprout, $oldGroups{$newGroup});              my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
204                my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
205              Trace("Comparing lists for $newGroup.") if T(4);              Trace("Comparing lists for $newGroup.") if T(4);
206              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList);              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
207              Trace("Comparison complete.") if T(4);              Trace("Comparison complete.") if T(4);
208              # Delete the old group data. When we're done, this means we'll have a list of the deleted              # Delete the old group data. When we're done, this means the hash
209              # groups.              # will contain only the deleted groups.
210              delete $oldGroups{$newGroup};              delete $oldGroups{$newGroup};
211              # Show the lists. Empty lists will not be shown.              # Show the lists. Empty lists will not be shown.
212              Trace("Displaying group lists.") if T(4);              Trace("Displaying group lists.") if T(4);
213              ShowLists(! $options->{summary},              push @html, ShowLists($cgi, "Genomes new to $newGroup"       => $insertedGroupGenomes,
                       "Genomes new to $newGroup" => $insertedGroupGenomes,  
214                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);
215          }          }
216      }      }
# Line 144  Line 219 
219      for my $oldGroup (sort keys %oldGroups) {      for my $oldGroup (sort keys %oldGroups) {
220          Trace("Processing deleted group $oldGroup.") if T(3);          Trace("Processing deleted group $oldGroup.") if T(3);
221          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
222          ShowLists(! $options->{summary}, "Genomes in deleted group $oldGroup" => \@groupGenomes);          push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
223      }      }
224      # Next, we get the subsystems.      # Next, we get the subsystems.
225      Trace("Processing subsystems.") if T(2);      Trace("Processing subsystems.") if T(2);
# Line 163  Line 238 
238              }              }
239          }          }
240      }      }
241      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,
               'New Subsystems'     => $insertedSubs,  
242                'Deleted Subsystems' => $deletedSubs);                'Deleted Subsystems' => $deletedSubs);
243      # Now we process the features of the common genes.      # Print what we've done so far.
244      if (! $options->{nofeats}) {      FlushData(\*OUTPUT, \@html);
245          # First we need a hash of the inserted stuff so we know to skip it.      # Now we need to process some statistics that require looping through all the
246        # features in the new sprout. While we're at it, we'll collect the BBH and
247        # coupling counts.
248        my $bbhCount = 0;
249        my $couplingCount = 0;
250        # We'll accumulate a report of genomes with missing BBHs in here.
251        my @bbhMissingGenomes = ();
252        # One of the reports is only for genomes common to both sprouts. To help us
253        # make this determination, we get a hash of the inserted genomes.
254          my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};          my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
255          # Loop through the genomees.      # Open the organism report file.
256          for my $genome (@newGenomes) {      Open(\*ORGOUT, ">$options->{orgFile}");
257              # Get the ID and name.      # Start the table.
258              my ($genomeID, $genomeName) = @{$genome};      my @orgHtml = ();
259              Trace("Processing $genomeID.") if T(3);      push @orgHtml, $cgi->h4({ align => 'center' }, "Genome Report for $version");
260              # Only process the common genes.      push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
261        push @orgHtml, $cgi->Tr($cgi->th("Genome"),
262                                $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
263                                                              "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
264        FlushData(\*ORGOUT, \@orgHtml);
265        # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
266        # not the list of genomes that are new!
267        for my $genomeData (@newGenomes) {
268            # Get this genome's ID and name.
269            my $genomeID = $genomeData->[0];
270            # Create a title for it.
271            my $genomeTitle = "$genomeID: $genomeData->[1]";
272            # Compute its size.
273            my $genomeSize = $newSprout->GenomeLength($genomeID);
274            # Get a list of the genomes in the new Sprout that are not this one.
275            my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
276            Trace("Computing BBH matrix for $genomeID.") if T(3);
277            # Get the bbh matrix going from the current genome to the others.
278            my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
279            # Set up the subsystem hash. This will be used to track which subsystems are used by
280            # the genome's features.
281            my %subHash = ();
282            # Set up the contig hash. This will be used to track which contigs are used by the
283            # genome's features.
284            my %contigHash = ();
285            # Set up a counter hash for feature types.
286            my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
287            # We'll store information about the genome's features (ID and functional role) in here.
288            my @newFeatures = ();
289            # Finally, we'll use this flag to warn us if there are no BBHs.
290            my $bbhsFound = 0;
291            # Loop through the genome's features. The order is important here, because we need
292            # to match the order used by "GetFeatures" for the feature difference comparison.
293            Trace("Processing feature statistics for $genomeID.") if T(3);
294            my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
295                                         [$genomeID]);
296            # Loop through the features, updating the counts.
297            while (my $feature = $fquery->Fetch()) {
298                # Update the total feature count.
299                $counters{total}++;
300                Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
301                # Get the feature ID and role.
302                my $fid = $feature->PrimaryValue('Feature(id)');
303                push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
304                # Check to see if we have BBH data.
305                if (exists $matrix{$fid}) {
306                    my $fidBbhCount = scalar keys %{$matrix{$fid}};
307                    if ($fidBbhCount > 0) {
308                        # Denote that this feature has BBHs.
309                        $bbhsFound = 1;
310                        # Add them to the total BBH count.
311                        $bbhCount += $fidBbhCount;
312                    }
313                }
314                # Ask for couplings.
315                my %coupleHash = $newSprout->CoupledFeatures($fid);
316                $couplingCount += keys %coupleHash;
317                # See if this feature is in a subsystem.
318                my %subs = $newSprout->SubsystemsOf($fid);
319                if (keys %subs) {
320                    $counters{in_ss}++;
321                    for my $sub (keys %subs) {
322                        $subHash{$sub} = 1;
323                    }
324                }
325                # Increment the feature type counter.
326                $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
327                # Insure we've tracked this feature's contigs.
328                my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
329                for my $loc (@locations) {
330                    my $locObject = BasicLocation->new($loc);
331                    $contigHash{$locObject->Contig} = 1;
332                }
333            }
334            Trace("Feature data compiled for $genomeID.") if T(3);
335            # The last thing we need to do is compute the number of features added or deleted.
336            # This goes in the genome report, but it's only meaningful for common genomes.
337            my ($addCount, $delCount) = ("","");
338              if (! $skipGenomes{$genomeID}) {              if (! $skipGenomes{$genomeID}) {
339                  # Compare the genome group information.              # Get the old features.
                 # Get the new and old features. This will be very stressful to the machine,  
                 # because there are lots of features.  
340                  my @oldFeatures = GetFeatures($oldSprout, $genomeID);                  my @oldFeatures = GetFeatures($oldSprout, $genomeID);
                 my @newFeatures = GetFeatures($newSprout, $genomeID);  
341                  Trace("Comparing features for $genomeID.") if T(3);                  Trace("Comparing features for $genomeID.") if T(3);
342                  # Compare the lists.                  # Compare the lists.
343                  my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);                  my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
344                  # Display the lists. Only nonempty lists are displayed; however, we do a check              $addCount = scalar(@{$insertedFeatures});
345                  # first anyway so the trace tells us what's happening.              $delCount = scalar(@{$deletedFeatures});
                 if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {  
                     Trace("Displaying feature differences.") if T(3);  
                     ShowLists(! $options->{summary},  
                               "New Features for $genomeID"      => $insertedFeatures,  
                               "Features Deleted from $genomeID" => $deletedFeatures);  
                 }  
             }  
         }  
346      }      }
347            # Check to see if this genome is missing its BBHs.
348            if (! $bbhsFound) {
349                # It is, so add a line for it to the missing-BBH list.
350                push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
351            }
352            push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
353                                    $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
354                                                                  scalar(keys %subHash), $counters{in_ss}, $counters{peg},
355                                                                  $counters{rna}, $counters{pp}, $addCount, $delCount]));
356            FlushData(\*ORGOUT, \@orgHtml);
357        }
358        # Close the table for the genome report.
359        push @orgHtml, $cgi->end_table();
360        FlushData(\*ORGOUT, \@orgHtml);
361        close ORGOUT;
362        # Check for a missing-BBH report.
363        if (scalar @bbhMissingGenomes) {
364            # There is a report, so put it into the output stream.
365            push @html, ShowTitle($cgi, "Genomes without BBHs");
366            push @html, @bbhMissingGenomes;
367        }
368        # Flush the genome feature comparison data and the missing-BBH report (if any).
369        FlushData(\*OUTPUT, \@html);
370        # Next, we show some basic counts.
371        Trace("Displaying counts.") if T(3);
372        push @html, ShowTitle($cgi, "Statistics for old Sprout");
373        push @html, ShowCounts($cgi, $oldSprout);
374        push @html, ShowTitle($cgi, "Statistics for new Sprout");
375        push @html, ShowCounts($cgi, $newSprout);
376        push @html, ShowDatum($cgi, BBHs => $bbhCount);
377        push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
378        FlushData(\*OUTPUT, \@html);
379        # Now we show the genomes that are not in groups but could be. First, we convert
380        # our group hash from the new Sprout into the form used on the web site.
381        Trace("Examining possible missing genomes in groups.") if T(2);
382        my %fixedGroups = $newSprout->Fix(%newGroups);
383        for my $group (sort keys %superTable) {
384            Trace("Checking group $group.");
385            # Loop through this group's genus/species pairs creating filters
386            # for a genome query.
387            my @filters = ();
388            my @filterParms = ();
389            for my $genusSpecies (@{$superTable{$group}->{content}}) {
390                my ($genus, $species) = @{$genusSpecies};
391                # Filter on genus.
392                my $filter = 'Genome(genus) = ?';
393                push @filterParms, $genus;
394                # If necessary, filter on species.
395                if ($species) {
396                    $filter .= ' AND Genome(species) = ?';
397                    push @filterParms, $species;
398                }
399                # Add this filter to the list.
400                push @filters, "($filter)";
401            }
402            # Get all the genomes that should be in the super-group.
403            my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters),
404                                                \@filterParms, 'Genome(id)');
405            # Get a hash of the genomes already in it.
406            my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
407            # Get the possibles that aren't in the group and add identifying information.
408            my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
409            # If anything survived, show the list.
410            if (@leftOut) {
411                push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
412            }
413        }
414        FlushData(\*OUTPUT, \@html);
415        # Close the table.
416        push @html, $cgi->end_table();
417        # Flush the last of the HTML.
418        FlushData(\*OUTPUT, \@html);
419        # Close the output file.
420        close OUTPUT;
421        Trace("Analysis complete.") if T(2);
422  };  };
423  if ($@) {  if ($@) {
424      Trace("Script failed with error: $@") if T(0);      Trace("Script failed with error: $@") if T(0);
# Line 205  Line 428 
428      $rtype = "no error";      $rtype = "no error";
429  }  }
430  if ($options->{phone}) {  if ($options->{phone}) {
431      my $msgID = Tracer::SendSMS($options->{phone}, "Subsystem Checker terminated with $rtype.");      my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
432      if ($msgID) {      if ($msgID) {
433          Trace("Phone message sent with ID $msgID.") if T(2);          Trace("Phone message sent with ID $msgID.") if T(2);
434      } else {      } else {
# Line 213  Line 436 
436      }      }
437  }  }
438    
439    =head3 FlushData
440    
441        FlushData($handle, \@lines);
442    
443    Write the specified lines to the output file and clear them out of the list. This
444    method is called periodically so that even if something goes wrong we can still
445    see the data accumulating in the output file. The key aspect here is that we
446    put new-line characters after each line written and show something in the trace
447    log.
448    
449    =over 4
450    
451    =item handle
452    
453    Output handle to which the lines should be written.
454    
455    =item lines
456    
457    Reference to a list of output lines. The output lines will be written to the output
458    handle and then removed from the list.
459    
460    =back
461    
462    =cut
463    
464    sub FlushData {
465        # Get the parameters.
466        my ($handle, $lines) = @_;
467        Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
468        # Write the lines.
469        print $handle join("\n", @{$lines});
470        # Write a terminating new-line.
471        print $handle "\n";
472        # Clear the list.
473        splice @{$lines};
474    }
475    
476  =head3 GetGenomes  =head3 GetGenomes
477    
478  C<< my @geneList = GetGenomes($sprout); >>      my @geneList = GetGenomes($sprout);
479    
480  Return a list of the genomes in the specified Sprout instance. The genomes  Return a list of the genomes in the specified Sprout instance. The genomes
481  are returned in alphabetical order by genome ID.  are returned in alphabetical order by genome ID.
# Line 251  Line 511 
511    
512  =head3 NameGenomes  =head3 NameGenomes
513    
514  C<< my $newList = NameGenomes($sprout, \@genomes); >>      my @newList = NameGenomes($sprout, \@genomes);
515    
516  Convert a list of genome IDs to a list of genome IDs with names.  Convert a list of genome IDs to a list of genome IDs with names.
517    
# Line 278  Line 538 
538      # Get the parameters.      # Get the parameters.
539      my ($sprout, $genomes) = @_;      my ($sprout, $genomes) = @_;
540      # Attach the names.      # Attach the names.
541      my @retVal = map { [$_, $sprout->GenusSpecies($_) ] } @{$genomes};      my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
542      # Return the result.      # Return the result.
543      return @retVal;      return @retVal;
544  }  }
545    
546  =head3 GetSubsystems  =head3 GetSubsystems
547    
548  C<< my @subsystems = GetSubsystems($sprout); >>      my @subsystems = GetSubsystems($sprout);
549    
550  Get a list of the subsystems in the specified Sprout instance.  Get a list of the subsystems in the specified Sprout instance.
551    
# Line 314  Line 574 
574      return @retVal;      return @retVal;
575  }  }
576    
577    =head3 GetProperties
578    
579        my @propertyList = GetProperties($sprout);
580    
581    Return a list of properties. Each element in the list will be a 2-tuple containing
582    the property name and value in the first column and its ID in the second column.
583    
584    =over 4
585    
586    =item sprout
587    
588    Sprout instance to be used to retrieve the properties.
589    
590    =item RETURN
591    
592    Returns a list of 2-tuples. The first element in each 2-tuple will be a string
593    in the form of an assignment of the property value to the property name. The second
594    element will be the number of features possessing the property. The list will be
595    sorted in ascending alphabetical order.
596    
597    =back
598    
599    =cut
600    
601    sub GetProperties {
602        # Get the parameters.
603        my ($sprout) = @_;
604        # Get the properties.
605        my @props = $sprout->GetAll(['Property'],
606                                    "ORDER BY Property(property-name), Property(property-value)", [],
607                                    ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
608        # Combine the property names and values and replace each property ID by a feature count.
609        my @retVal;
610        for my $propItem (@props) {
611            # Split up the value on punctuation boundaries for readability.
612            my $propValue = $propItem->[1];
613            $propValue =~ s/::/ :: /g;
614            $propValue =~ s/([,;])(\S)/$1 $2/g;
615            my $label = $propItem->[0] . " = " . $propValue;
616            my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
617                                          [$propItem->[2]]);
618            push @retVal, [$label, $count];
619        }
620        # Return the result.
621        return @retVal;
622    }
623    
624  =head3 GetFeatures  =head3 GetFeatures
625    
626  C<< my @features = GetFeatures($sprout, $genomeID); >>      my @features = GetFeatures($sprout, $genomeID);
627    
628  Return the features of the specified genome in the specified Sprout instance.  Return the features of the specified genome in the specified Sprout instance.
629    
# Line 352  Line 659 
659    
660  =head3 ShowLists  =head3 ShowLists
661    
662  C<< ShowLists($all, %lists); >>      my @htmlLines = ShowLists($cgi, %lists);
663    
664  Display a set of lists. Each list should consist of 2-tuples.  Display a set of lists. Each list should consist of 2-tuples, and the list
665    entries will be displayed as 2-element table rows with a header row.
666    
667  =over 4  =over 4
668    
669  =item all  =item cgi
670    
671  TRUE if details should be displayed; FALSE if only summaries should be displayed.  A CGI query object containing the options for this program. It is also used to format
672    HTML.
673    
674  =item lists  =item lists
675    
676  A hash mapping list names to list references.  A hash mapping list names to list references.
677    
678    =item RETURN
679    
680    Returns a list of HTML lines displaying the list in tabular form.
681    
682    =back
683    
684  =cut  =cut
685    
686  sub ShowLists {  sub ShowLists {
687      # Get the parameters.      # Get the parameters.
688      my $all = shift @_;      my $cgi = shift @_;
689      my %lists = @_;      my %lists = @_;
690        # Declare the return variable. The HTML lines will be accumulated
691        # in here and then joined with new-lines.
692        my @retVal = ();
693      # Loop through the lists in alphabetical order by list name.      # Loop through the lists in alphabetical order by list name.
694      for my $listName (sort keys %lists) {      for my $listName (sort keys %lists) {
695          # Get the list itself.          # Get the list itself.
# Line 380  Line 698 
698          my $listSize = scalar @{$list};          my $listSize = scalar @{$list};
699          # Only proceed if the list is nonempty.          # Only proceed if the list is nonempty.
700          if ($listSize > 0) {          if ($listSize > 0) {
701              my $header = ShowHeader($listName, $listSize);              my $header = ComputeHeader($listName, $listSize);
             print "$header\n";  
702              Trace($header) if T(3);              Trace($header) if T(3);
703              # If we're at trace level 3, display the list.              # Display the header line as a header.
704              if ($all) {              push @retVal, ShowTitle($cgi, $header);
705                  # Put a spacer under the title.              # Now display the list as table rows. Note we convert underbars to spaces
706                  print "\n";              # in the name row to make the table easier to fit into narrow places.
                 # Get the width of the name column.  
                 my $width = 0;  
                 for my $entryLen (map { length $_->[0] } @{$list}) {  
                     $width = $entryLen if $entryLen > $width;  
                 }  
                 # Now display the list.  
707                  for my $entry (@{$list}) {                  for my $entry (@{$list}) {
708                      my ($name, $data) = @{$entry};                      my ($name, $data) = @{$entry};
709                      print "    $name" . (" " x ($width - length $name)) . " $data\n";                  $name =~ tr/_/ /;
710                  }                  push @retVal, ShowDatum($cgi, $name => $data);
                 print "\n\n";  
711              }              }
712          }          }
713      }      }
714        # Return the list of HTML lines.
715        return @retVal;
716  }  }
717    
718  =head3 ShowHeader  =head3 ComputeHeader
719    
720  C<< my $header = ShowHeader($name, $count); >>      my $header = ComputeHeader($name, $count);
721    
722  Return a list header for a list of the specified length.  Return a list header for a list of the specified length.
723    
# Line 427  Line 739 
739    
740  =cut  =cut
741    
742  sub ShowHeader {  sub ComputeHeader {
743      # Get the parameters.      # Get the parameters.
744      my ($name, $count) = @_;      my ($name, $count) = @_;
745      # Declare the return variable.      # Declare the return variable.
746      my $retVal;      my $retVal;
747      if ($count == 0) {      if ($count == 0) {
748          $retVal = "*** $name: none";          $retVal = "$name: none";
749      } elsif ($count == 1) {      } elsif ($count == 1) {
750          $retVal = "*** $name: one";          $retVal = "$name: one";
751      } else {      } else {
752          $retVal = "*** $name: $count";          $retVal = "$name: $count";
753        }
754        # Return the result.
755        return $retVal;
756    }
757    
758    =head3 ShowCounts
759    
760        ShowCounts($sprout);
761    
762    Display general counts for the specified sprout instance. These counts are
763    used in progress reports.
764    
765    =over 4
766    
767    =item cgi
768    
769    CGI query object used to format the output.
770    
771    =item sprout
772    
773    Sprout instance for which counts are to be produced.
774    
775    =item RETURN
776    
777    Returns a list of HTML lines with the counts arranged in table rows.
778    
779    =back
780    
781    =cut
782    
783    sub ShowCounts {
784        # Get the parameters.
785        my ($cgi, $sprout) = @_;
786        # Count genomes and subsystems.
787        my $genomes = $sprout->GetCount(['Genome']);
788        my $subsystems = $sprout->GetCount(['Subsystem']);
789        # Count roles and external functional assignments.
790        my $roles = $sprout->GetCount(['OccursInSubsystem']);
791        my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
792        # Count features.
793        my $features = $sprout->GetCount(['Feature']);
794        # Display the counts.
795        my @retVal = ();
796        push @retVal, ShowDatum($cgi, Genomes => $genomes);
797        push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
798        push @retVal, ShowDatum($cgi, Roles => $roles);
799        push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
800        push @retVal, ShowDatum($cgi, Features => $features);
801        # Return the html.
802        return @retVal;
803    }
804    
805    =head3 ShowDatum
806    
807        my $htmlText = ShowDatum($cgi, $label, $value);
808    
809    Return a table row displaying the specified label and value.
810    
811    =over 4
812    
813    =item cgi
814    
815    CGI query object used to generate the HTML text.
816    
817    =item label
818    
819    Label to appear in the left cell of the table row.
820    
821    =item value
822    
823    Value to appear in the right cell of the table row.
824    
825    =item RETURN
826    
827    Returns the HTML for a single table row with the last cell right-aligned.
828    
829    =back
830    
831    =cut
832    
833    sub ShowDatum {
834        # Get the parameters.
835        my ($cgi, $label, $value) = @_;
836        # Create the table row.
837        my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
838        # Return it.
839        return $retVal;
840      }      }
841    
842    =head3 ShowTitle
843    
844        my $html = ShowTitle($cgi, $title);
845    
846    Display a title line. This will be a merged table row with bolded text.
847    
848    =over 4
849    
850    =item cgi
851    
852    CGI query object used to generate HTML output.
853    
854    =item RETURN
855    
856    Returns the HTML text.
857    
858    =back
859    
860    =cut
861    
862    sub ShowTitle {
863        # Get the parameters.
864        my ($cgi, $title) = @_;
865        # Declare the return variable.
866        my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
867      # Return the result.      # Return the result.
868      return $retVal;      return $retVal;
869  }  }

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.26

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3