[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Thu Aug 24 17:56:21 2006 UTC revision 1.26, Tue Feb 5 05:15:42 2008 UTC
# Line 3  Line 3 
3  =head1 New Stuff Checker  =head1 New Stuff Checker
4    
5  This script compares the genomes, features, and annotations in  This script compares the genomes, features, and annotations in
6  the old and new sprouts and lists the differences.  the old and new sprouts and lists the differences and produces
7    a report in HTML. The output is an HTML fragment, not an entire
8    web page. This is because we expect it to be included in another
9    page.
10    
11  The currently-supported command-line options are as follows.  The currently-supported command-line options for NewStuffCheck are as follows.
12    
13  =over 4  =over 4
14    
 =item summary  
   
 Do not display details, only difference summaries.  
   
15  =item user  =item user
16    
17  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 43  Line 42 
42    
43  Phone number to message when the script is complete.  Phone number to message when the script is complete.
44    
45    =item groupFile
46    
47    Name of the group file (described below). The default is C<groups.tbl>
48    in the Sprout data directory.
49    
50    =item outFile
51    
52    Output file name. The default is C<html/includes/diff.inc> in the
53    nmpdr C<next> directory.
54    
55    =item orgFile
56    
57    Output file for the genome report. The default is C<html/includes/genomes.inc> in
58    the nmpdr C<next> directory.
59    
60    =back
61    
62    =head2 The Group File
63    
64    A key data file for this process is C<groups.tbl>. This file is kept in the
65    Sprout Data directory, and contains the following columns:
66    
67    =over 4
68    
69    =item name
70    
71    Name of the group.
72    
73    =item page
74    
75    Name of the group's page on the web site (e.g. C<campy.php> for
76    Campylobacter)
77    
78    =item genus
79    
80    Genus of the group
81    
82    =item species
83    
84    Species of the group, or an empty string if the group is for an entire
85    genus. If the group contains more than one species, the species names
86    should be separated by commas.
87    
88  =back  =back
89    
90  =cut  =cut
91    
92  use strict;  use strict;
93  use Tracer;  use Tracer;
 use DocUtils;  
 use TestUtils;  
94  use Cwd;  use Cwd;
95  use File::Copy;  use File::Copy;
96  use File::Path;  use File::Path;
97  use FIG;  use FIG;
98  use SFXlate;  use SFXlate;
99  use Sprout;  use Sprout;
100    use CGI;
101    use FIGRules;
102    
103  # Get the command-line options and parameters.  # Get the command-line options and parameters.
104  my ($options, @parameters) = StandardSetup([qw(Sprout) ],  my ($options, @parameters) = StandardSetup([qw(Sprout) ],
105                                             {                                             {
106                                                trace => ["2-", "tracing level; use a minus to prevent tracing to standard output"],                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
107                                                summary => ["", "if specified, detailed lists of the different items will not be displayed"],                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
108                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
109                                                  outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
110                                                  orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
111                                             },                                             },
112                                             "",                                             "",
113                                             @ARGV);                                             @ARGV);
# Line 71  Line 115 
115  my $rtype;  my $rtype;
116  # Insure we catch errors.  # Insure we catch errors.
117  eval {  eval {
118        # Get a CGI object for building the output. We pass it the options hash so
119        # the formatting subroutines have access to it. Also, we want it to know
120        # we're not a real web script.
121        my $cgi = CGI->new($options);
122        # Start accumulating HTML data.
123        my @html = ();
124        # Open the output file. We do this early in case there's a problem.
125        my $outFileName = $options->{outFile};
126        Trace("Opening output file $outFileName.") if T(2);
127        Open(\*OUTPUT, ">$outFileName");
128        # Get a nice-looking version name and make it into a title.
129        my $version = uc $FIG_Config::nmpdr_version;
130        $version =~ tr/_/ /;
131        push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
132        # Start the table.
133        push @html, $cgi->start_table({align => "center", border => "2"});
134        # Get the group file.
135        my $groupFileName = $options->{groupFile};
136        Trace("Reading group file $groupFileName.") if T(2);
137      Trace("Processing genomes.") if T(2);      Trace("Processing genomes.") if T(2);
138        # Get the current SEED.
139        my $fig = FIG->new();
140      # Get the old Sprout.      # Get the old Sprout.
141      my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);      my $oldSprout = SFXlate->old_sprout_only();
142      # Get its genomes in alphabetical order.      # Get its genomes in alphabetical order.
143      my @oldGenomes = GetGenes($oldSprout);      my @oldGenomes = GetGenomes($oldSprout);
144      # Get the new Sprout.      # Get the new Sprout.
145      my $newSprout = SFXlate->new_sprout_only();      my $newSprout = SFXlate->new_sprout_only();
146      # Get its genomes in alphabetical order.      # Get its genomes in alphabetical order.
147      my @newGenomes = GetGenes($newSprout);      my @newGenomes = GetGenomes($newSprout);
148      # Compare the two genomes lists.      # Compare the two genomes lists.
149      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
150      # Add feature counts to the new genomes.      # Get the super-group data.
151        my %superTable = $newSprout->CheckGroupFile();
152        # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
153        # lists so that the genome names are first and the IDs second.
154        my @insertedGenomeList = ();
155      for my $insertedGenome (@{$insertedGenomes}) {      for my $insertedGenome (@{$insertedGenomes}) {
156          my $genomeID = $insertedGenome->[0];          my $genomeID = $insertedGenome->[0];
157          # For a new genome, display the feature count.          # For a new genome, display the feature and BBH counts.
158          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
159                                           [$genomeID]);                                           [$genomeID]);
160          my $suffix = ($count == 1 ? " one feature" : "$count features");          my $suffix = ($count == 1 ? " one feature" : "$count features");
161          $insertedGenome->[1] .= "($suffix)";          my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
162            $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
163            push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
164        }
165        # Create a list for the deleted genomes that contains information about SEED status.
166        # This list is flipped, too.
167        my @deletedGenomeList = ();
168        for my $deletedGenome (@{$deletedGenomes}) {
169            my $genomeID = $deletedGenome->[0];
170            my $suffix = "";
171            if ($fig->is_genome($genomeID)) {
172                # Here the deleted genome is still in the SEED.
173                my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
174                $suffix = " (still in SEED, $complete)";
175            } else {
176                # It's not in the SEED. See if it has been replaced.
177                my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
178                my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
179                if (scalar @genomeIDs) {
180                    $suffix = " (replaced)";
181                }
182            }
183            push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
184      }      }
185      # Display the lists.      # Display the lists.
186      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Genomes'     => \@insertedGenomeList,
187                'New Genomes'     => $insertedGenomes,                                  'Deleted Genomes' => \@deletedGenomeList);
188                'Deleted Genomes' => $deletedGenomes);      # Now the groups.
189        Trace("Comparing groups.") if T(2);
190        my %oldGroups = $oldSprout->GetGroups();
191        my %newGroups = $newSprout->GetGroups();
192        # Loop through the new groups.
193        for my $newGroup (sort keys %newGroups) {
194            Trace("Processing group $newGroup.") if T(3);
195            # Find out if this group is new to this version.
196            if (! exists $oldGroups{$newGroup}) {
197                # Construct a list of this group's genes.
198                my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
199                push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
200            } else {
201                # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
202                # on the genome ID, which will become the second list element after the call to NameGenomes.
203                my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
204                my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
205                Trace("Comparing lists for $newGroup.") if T(4);
206                my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
207                Trace("Comparison complete.") if T(4);
208                # Delete the old group data. When we're done, this means the hash
209                # will contain only the deleted groups.
210                delete $oldGroups{$newGroup};
211                # Show the lists. Empty lists will not be shown.
212                Trace("Displaying group lists.") if T(4);
213                push @html, ShowLists($cgi, "Genomes new to $newGroup"       => $insertedGroupGenomes,
214                                            "Genomes no longer in $newGroup" => $deletedGroupGenomes);
215            }
216        }
217        Trace("Processing deleted groups.") if T(4);
218        # Now list the deleted groups.
219        for my $oldGroup (sort keys %oldGroups) {
220            Trace("Processing deleted group $oldGroup.") if T(3);
221            my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
222            push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
223        }
224      # Next, we get the subsystems.      # Next, we get the subsystems.
225      Trace("Processing subsystems.") if T(2);      Trace("Processing subsystems.") if T(2);
226      my @oldSubsystems = GetSubsystems($oldSprout);      my @oldSubsystems = GetSubsystems($oldSprout);
227      my @newSubsystems = GetSubsystems($newSprout);      my @newSubsystems = GetSubsystems($newSprout);
228      # Compare and display the subsystem lists.      # Compare and display the subsystem lists.
229      my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);      my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
230      ShowLists(! $options->{summary},      # Check the deleted subsystems to see if they're in SEED.
231                'New Subsystems'     => $insertedSubs,      if (scalar @{$deletedSubs} > 0) {
232            my %subChecker = map { $_ => 1 } $fig->all_subsystems();
233            for my $deletedSub (@{$deletedSubs}) {
234                my $subID = $deletedSub->[0];
235                if ($subChecker{$subID}) {
236                    my $trusted = ($fig->usable_subsystem($subID) ? "usable" : "not usable");
237                    $deletedSub->[1] .= " (still in SEED, $trusted)";
238                }
239            }
240        }
241        push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,
242                'Deleted Subsystems' => $deletedSubs);                'Deleted Subsystems' => $deletedSubs);
243      # Now we process the features of the common genes. First we need a hash      # Print what we've done so far.
244      # of the inserted stuff so we know to skip it.      FlushData(\*OUTPUT, \@html);
245      my %skipGenes = map { $_->[0] => 1 } @{$insertedGenomes};      # Now we need to process some statistics that require looping through all the
246      # Loop through the genomees.      # features in the new sprout. While we're at it, we'll collect the BBH and
247      for my $genome (@newGenomes) {      # coupling counts.
248          # Get the ID and name.      my $bbhCount = 0;
249          my ($genomeID, $genomeName) = @{$genome};      my $couplingCount = 0;
250          Trace("Processing $genomeID.") if T(3);      # We'll accumulate a report of genomes with missing BBHs in here.
251          # Only process the common genes.      my @bbhMissingGenomes = ();
252          if (! $skipGenes{$genomeID}) {      # One of the reports is only for genomes common to both sprouts. To help us
253              # Get the new and old features. This will be very stressful to the machine,      # make this determination, we get a hash of the inserted genomes.
254              # because there are lots of features.      my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
255        # Open the organism report file.
256        Open(\*ORGOUT, ">$options->{orgFile}");
257        # Start the table.
258        my @orgHtml = ();
259        push @orgHtml, $cgi->h4({ align => 'center' }, "Genome Report for $version");
260        push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
261        push @orgHtml, $cgi->Tr($cgi->th("Genome"),
262                                $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
263                                                              "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
264        FlushData(\*ORGOUT, \@orgHtml);
265        # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
266        # not the list of genomes that are new!
267        for my $genomeData (@newGenomes) {
268            # Get this genome's ID and name.
269            my $genomeID = $genomeData->[0];
270            # Create a title for it.
271            my $genomeTitle = "$genomeID: $genomeData->[1]";
272            # Compute its size.
273            my $genomeSize = $newSprout->GenomeLength($genomeID);
274            # Get a list of the genomes in the new Sprout that are not this one.
275            my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
276            Trace("Computing BBH matrix for $genomeID.") if T(3);
277            # Get the bbh matrix going from the current genome to the others.
278            my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
279            # Set up the subsystem hash. This will be used to track which subsystems are used by
280            # the genome's features.
281            my %subHash = ();
282            # Set up the contig hash. This will be used to track which contigs are used by the
283            # genome's features.
284            my %contigHash = ();
285            # Set up a counter hash for feature types.
286            my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
287            # We'll store information about the genome's features (ID and functional role) in here.
288            my @newFeatures = ();
289            # Finally, we'll use this flag to warn us if there are no BBHs.
290            my $bbhsFound = 0;
291            # Loop through the genome's features. The order is important here, because we need
292            # to match the order used by "GetFeatures" for the feature difference comparison.
293            Trace("Processing feature statistics for $genomeID.") if T(3);
294            my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
295                                         [$genomeID]);
296            # Loop through the features, updating the counts.
297            while (my $feature = $fquery->Fetch()) {
298                # Update the total feature count.
299                $counters{total}++;
300                Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
301                # Get the feature ID and role.
302                my $fid = $feature->PrimaryValue('Feature(id)');
303                push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
304                # Check to see if we have BBH data.
305                if (exists $matrix{$fid}) {
306                    my $fidBbhCount = scalar keys %{$matrix{$fid}};
307                    if ($fidBbhCount > 0) {
308                        # Denote that this feature has BBHs.
309                        $bbhsFound = 1;
310                        # Add them to the total BBH count.
311                        $bbhCount += $fidBbhCount;
312                    }
313                }
314                # Ask for couplings.
315                my %coupleHash = $newSprout->CoupledFeatures($fid);
316                $couplingCount += keys %coupleHash;
317                # See if this feature is in a subsystem.
318                my %subs = $newSprout->SubsystemsOf($fid);
319                if (keys %subs) {
320                    $counters{in_ss}++;
321                    for my $sub (keys %subs) {
322                        $subHash{$sub} = 1;
323                    }
324                }
325                # Increment the feature type counter.
326                $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
327                # Insure we've tracked this feature's contigs.
328                my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
329                for my $loc (@locations) {
330                    my $locObject = BasicLocation->new($loc);
331                    $contigHash{$locObject->Contig} = 1;
332                }
333            }
334            Trace("Feature data compiled for $genomeID.") if T(3);
335            # The last thing we need to do is compute the number of features added or deleted.
336            # This goes in the genome report, but it's only meaningful for common genomes.
337            my ($addCount, $delCount) = ("","");
338            if (! $skipGenomes{$genomeID}) {
339                # Get the old features.
340              my @oldFeatures = GetFeatures($oldSprout, $genomeID);              my @oldFeatures = GetFeatures($oldSprout, $genomeID);
             my @newFeatures = GetFeatures($newSprout, $genomeID);  
341              Trace("Comparing features for $genomeID.") if T(3);              Trace("Comparing features for $genomeID.") if T(3);
342              # Compare the lists.              # Compare the lists.
343              my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);              my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
344              # If either list has data, we want to display it.              $addCount = scalar(@{$insertedFeatures});
345              if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {              $delCount = scalar(@{$deletedFeatures});
                 Trace("Displaying feature differences.") if T(3);  
                 ShowLists(! $options->{summary},  
                           "New Features for $genomeID"      => $insertedFeatures,  
                           "Features Deleted from $genomeID" => $deletedFeatures);  
             }  
         }  
346      }      }
347            # Check to see if this genome is missing its BBHs.
348            if (! $bbhsFound) {
349                # It is, so add a line for it to the missing-BBH list.
350                push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
351            }
352            push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
353                                    $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
354                                                                  scalar(keys %subHash), $counters{in_ss}, $counters{peg},
355                                                                  $counters{rna}, $counters{pp}, $addCount, $delCount]));
356            FlushData(\*ORGOUT, \@orgHtml);
357        }
358        # Close the table for the genome report.
359        push @orgHtml, $cgi->end_table();
360        FlushData(\*ORGOUT, \@orgHtml);
361        close ORGOUT;
362        # Check for a missing-BBH report.
363        if (scalar @bbhMissingGenomes) {
364            # There is a report, so put it into the output stream.
365            push @html, ShowTitle($cgi, "Genomes without BBHs");
366            push @html, @bbhMissingGenomes;
367        }
368        # Flush the genome feature comparison data and the missing-BBH report (if any).
369        FlushData(\*OUTPUT, \@html);
370        # Next, we show some basic counts.
371        Trace("Displaying counts.") if T(3);
372        push @html, ShowTitle($cgi, "Statistics for old Sprout");
373        push @html, ShowCounts($cgi, $oldSprout);
374        push @html, ShowTitle($cgi, "Statistics for new Sprout");
375        push @html, ShowCounts($cgi, $newSprout);
376        push @html, ShowDatum($cgi, BBHs => $bbhCount);
377        push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
378        FlushData(\*OUTPUT, \@html);
379        # Now we show the genomes that are not in groups but could be. First, we convert
380        # our group hash from the new Sprout into the form used on the web site.
381        Trace("Examining possible missing genomes in groups.") if T(2);
382        my %fixedGroups = $newSprout->Fix(%newGroups);
383        for my $group (sort keys %superTable) {
384            Trace("Checking group $group.");
385            # Loop through this group's genus/species pairs creating filters
386            # for a genome query.
387            my @filters = ();
388            my @filterParms = ();
389            for my $genusSpecies (@{$superTable{$group}->{content}}) {
390                my ($genus, $species) = @{$genusSpecies};
391                # Filter on genus.
392                my $filter = 'Genome(genus) = ?';
393                push @filterParms, $genus;
394                # If necessary, filter on species.
395                if ($species) {
396                    $filter .= ' AND Genome(species) = ?';
397                    push @filterParms, $species;
398                }
399                # Add this filter to the list.
400                push @filters, "($filter)";
401            }
402            # Get all the genomes that should be in the super-group.
403            my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters),
404                                                \@filterParms, 'Genome(id)');
405            # Get a hash of the genomes already in it.
406            my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
407            # Get the possibles that aren't in the group and add identifying information.
408            my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
409            # If anything survived, show the list.
410            if (@leftOut) {
411                push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
412            }
413        }
414        FlushData(\*OUTPUT, \@html);
415        # Close the table.
416        push @html, $cgi->end_table();
417        # Flush the last of the HTML.
418        FlushData(\*OUTPUT, \@html);
419        # Close the output file.
420        close OUTPUT;
421        Trace("Analysis complete.") if T(2);
422  };  };
423  if ($@) {  if ($@) {
424      Trace("Script failed with error: $@") if T(0);      Trace("Script failed with error: $@") if T(0);
# Line 139  Line 428 
428      $rtype = "no error";      $rtype = "no error";
429  }  }
430  if ($options->{phone}) {  if ($options->{phone}) {
431      my $msgID = Tracer::SendSMS($options->{phone}, "Subsystem Checker terminated with $rtype.");      my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
432      if ($msgID) {      if ($msgID) {
433          Trace("Phone message sent with ID $msgID.") if T(2);          Trace("Phone message sent with ID $msgID.") if T(2);
434      } else {      } else {
# Line 147  Line 436 
436      }      }
437  }  }
438    
439  =head3 GetGenes  =head3 FlushData
440    
441        FlushData($handle, \@lines);
442    
443    Write the specified lines to the output file and clear them out of the list. This
444    method is called periodically so that even if something goes wrong we can still
445    see the data accumulating in the output file. The key aspect here is that we
446    put new-line characters after each line written and show something in the trace
447    log.
448    
449    =over 4
450    
451    =item handle
452    
453    Output handle to which the lines should be written.
454    
455  C<< my @geneList = GetGenes($sprout); >>  =item lines
456    
457    Reference to a list of output lines. The output lines will be written to the output
458    handle and then removed from the list.
459    
460    =back
461    
462    =cut
463    
464    sub FlushData {
465        # Get the parameters.
466        my ($handle, $lines) = @_;
467        Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
468        # Write the lines.
469        print $handle join("\n", @{$lines});
470        # Write a terminating new-line.
471        print $handle "\n";
472        # Clear the list.
473        splice @{$lines};
474    }
475    
476    =head3 GetGenomes
477    
478        my @geneList = GetGenomes($sprout);
479    
480  Return a list of the genomes in the specified Sprout instance. The genomes  Return a list of the genomes in the specified Sprout instance. The genomes
481  are returned in alphabetical order by genome ID.  are returned in alphabetical order by genome ID.
# Line 169  Line 495 
495    
496  =cut  =cut
497    
498  sub GetGenes {  sub GetGenomes {
499      # Get the parameters.      # Get the parameters.
500      my ($sprout) = @_;      my ($sprout) = @_;
501      # Get the desired data.      # Get the desired data.
# Line 183  Line 509 
509      return @retVal;      return @retVal;
510  }  }
511    
512    =head3 NameGenomes
513    
514        my @newList = NameGenomes($sprout, \@genomes);
515    
516    Convert a list of genome IDs to a list of genome IDs with names.
517    
518    =over 4
519    
520    =item sprout
521    
522    The relevant sprout instance.
523    
524    =item genomes
525    
526    Reference to a list of genome IDs
527    
528    =item RETURN
529    
530    Returns a list of 2-tuples, each tuple consisting of a genome ID followed by a
531    genome name.
532    
533    =back
534    
535    =cut
536    
537    sub NameGenomes {
538        # Get the parameters.
539        my ($sprout, $genomes) = @_;
540        # Attach the names.
541        my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
542        # Return the result.
543        return @retVal;
544    }
545    
546  =head3 GetSubsystems  =head3 GetSubsystems
547    
548  C<< my @subsystems = GetSubsystems($sprout); >>      my @subsystems = GetSubsystems($sprout);
549    
550  Get a list of the subsystems in the specified Sprout instance.  Get a list of the subsystems in the specified Sprout instance.
551    
# Line 214  Line 574 
574      return @retVal;      return @retVal;
575  }  }
576    
577    =head3 GetProperties
578    
579        my @propertyList = GetProperties($sprout);
580    
581    Return a list of properties. Each element in the list will be a 2-tuple containing
582    the property name and value in the first column and its ID in the second column.
583    
584    =over 4
585    
586    =item sprout
587    
588    Sprout instance to be used to retrieve the properties.
589    
590    =item RETURN
591    
592    Returns a list of 2-tuples. The first element in each 2-tuple will be a string
593    in the form of an assignment of the property value to the property name. The second
594    element will be the number of features possessing the property. The list will be
595    sorted in ascending alphabetical order.
596    
597    =back
598    
599    =cut
600    
601    sub GetProperties {
602        # Get the parameters.
603        my ($sprout) = @_;
604        # Get the properties.
605        my @props = $sprout->GetAll(['Property'],
606                                    "ORDER BY Property(property-name), Property(property-value)", [],
607                                    ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
608        # Combine the property names and values and replace each property ID by a feature count.
609        my @retVal;
610        for my $propItem (@props) {
611            # Split up the value on punctuation boundaries for readability.
612            my $propValue = $propItem->[1];
613            $propValue =~ s/::/ :: /g;
614            $propValue =~ s/([,;])(\S)/$1 $2/g;
615            my $label = $propItem->[0] . " = " . $propValue;
616            my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
617                                          [$propItem->[2]]);
618            push @retVal, [$label, $count];
619        }
620        # Return the result.
621        return @retVal;
622    }
623    
624  =head3 GetFeatures  =head3 GetFeatures
625    
626  C<< my @features = GetFeatures($sprout, $genomeID); >>      my @features = GetFeatures($sprout, $genomeID);
627    
628  Return the features of the specified genome in the specified Sprout instance.  Return the features of the specified genome in the specified Sprout instance.
629    
# Line 252  Line 659 
659    
660  =head3 ShowLists  =head3 ShowLists
661    
662  C<< ShowLists($all, %lists); >>      my @htmlLines = ShowLists($cgi, %lists);
663    
664  Display a set of lists. Each list should consist of 2-tuples.  Display a set of lists. Each list should consist of 2-tuples, and the list
665    entries will be displayed as 2-element table rows with a header row.
666    
667  =over 4  =over 4
668    
669  =item all  =item cgi
670    
671  TRUE if details should be displayed; FALSE if only summaries should be displayed.  A CGI query object containing the options for this program. It is also used to format
672    HTML.
673    
674  =item lists  =item lists
675    
676  A hash mapping list names to list references.  A hash mapping list names to list references.
677    
678    =item RETURN
679    
680    Returns a list of HTML lines displaying the list in tabular form.
681    
682    =back
683    
684  =cut  =cut
685    
686  sub ShowLists {  sub ShowLists {
687      # Get the parameters.      # Get the parameters.
688      my $all = shift @_;      my $cgi = shift @_;
689      my %lists = @_;      my %lists = @_;
690        # Declare the return variable. The HTML lines will be accumulated
691        # in here and then joined with new-lines.
692        my @retVal = ();
693      # Loop through the lists in alphabetical order by list name.      # Loop through the lists in alphabetical order by list name.
694      for my $listName (sort keys %lists) {      for my $listName (sort keys %lists) {
695          # Get the list itself.          # Get the list itself.
# Line 280  Line 698 
698          my $listSize = scalar @{$list};          my $listSize = scalar @{$list};
699          # Only proceed if the list is nonempty.          # Only proceed if the list is nonempty.
700          if ($listSize > 0) {          if ($listSize > 0) {
701              my $header = ShowHeader($listName, $listSize);              my $header = ComputeHeader($listName, $listSize);
             print "$header\n";  
702              Trace($header) if T(3);              Trace($header) if T(3);
703              # If we're at trace level 3, display the list.              # Display the header line as a header.
704              if ($all) {              push @retVal, ShowTitle($cgi, $header);
705                  # Put a spacer under the title.              # Now display the list as table rows. Note we convert underbars to spaces
706                  print "\n";              # in the name row to make the table easier to fit into narrow places.
                 # Get the width of the name column.  
                 my $width = 0;  
                 for my $entryLen (map { length $_->[0] } @{$list}) {  
                     $width = $entryLen if $entryLen > $width;  
                 }  
                 # Now display the list.  
707                  for my $entry (@{$list}) {                  for my $entry (@{$list}) {
708                      my ($name, $data) = @{$entry};                      my ($name, $data) = @{$entry};
709                      print "    $name" . (" " x ($width - length $name)) . " $data\n";                  $name =~ tr/_/ /;
710                  }                  push @retVal, ShowDatum($cgi, $name => $data);
                 print "\n\n";  
711              }              }
712          }          }
713      }      }
714        # Return the list of HTML lines.
715        return @retVal;
716  }  }
717    
718  =head3 ShowHeader  =head3 ComputeHeader
719    
720  C<< my $header = ShowHeader($name, $count); >>      my $header = ComputeHeader($name, $count);
721    
722  Return a list header for a list of the specified length.  Return a list header for a list of the specified length.
723    
# Line 327  Line 739 
739    
740  =cut  =cut
741    
742  sub ShowHeader {  sub ComputeHeader {
743      # Get the parameters.      # Get the parameters.
744      my ($name, $count) = @_;      my ($name, $count) = @_;
745      # Declare the return variable.      # Declare the return variable.
746      my $retVal;      my $retVal;
747      if ($count == 0) {      if ($count == 0) {
748          $retVal = "*** $name: none";          $retVal = "$name: none";
749      } elsif ($count == 1) {      } elsif ($count == 1) {
750          $retVal = "*** $name: one";          $retVal = "$name: one";
751      } else {      } else {
752          $retVal = "*** $name: $count";          $retVal = "$name: $count";
753        }
754        # Return the result.
755        return $retVal;
756    }
757    
758    =head3 ShowCounts
759    
760        ShowCounts($sprout);
761    
762    Display general counts for the specified sprout instance. These counts are
763    used in progress reports.
764    
765    =over 4
766    
767    =item cgi
768    
769    CGI query object used to format the output.
770    
771    =item sprout
772    
773    Sprout instance for which counts are to be produced.
774    
775    =item RETURN
776    
777    Returns a list of HTML lines with the counts arranged in table rows.
778    
779    =back
780    
781    =cut
782    
783    sub ShowCounts {
784        # Get the parameters.
785        my ($cgi, $sprout) = @_;
786        # Count genomes and subsystems.
787        my $genomes = $sprout->GetCount(['Genome']);
788        my $subsystems = $sprout->GetCount(['Subsystem']);
789        # Count roles and external functional assignments.
790        my $roles = $sprout->GetCount(['OccursInSubsystem']);
791        my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
792        # Count features.
793        my $features = $sprout->GetCount(['Feature']);
794        # Display the counts.
795        my @retVal = ();
796        push @retVal, ShowDatum($cgi, Genomes => $genomes);
797        push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
798        push @retVal, ShowDatum($cgi, Roles => $roles);
799        push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
800        push @retVal, ShowDatum($cgi, Features => $features);
801        # Return the html.
802        return @retVal;
803      }      }
804    
805    =head3 ShowDatum
806    
807        my $htmlText = ShowDatum($cgi, $label, $value);
808    
809    Return a table row displaying the specified label and value.
810    
811    =over 4
812    
813    =item cgi
814    
815    CGI query object used to generate the HTML text.
816    
817    =item label
818    
819    Label to appear in the left cell of the table row.
820    
821    =item value
822    
823    Value to appear in the right cell of the table row.
824    
825    =item RETURN
826    
827    Returns the HTML for a single table row with the last cell right-aligned.
828    
829    =back
830    
831    =cut
832    
833    sub ShowDatum {
834        # Get the parameters.
835        my ($cgi, $label, $value) = @_;
836        # Create the table row.
837        my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
838        # Return it.
839        return $retVal;
840    }
841    
842    =head3 ShowTitle
843    
844        my $html = ShowTitle($cgi, $title);
845    
846    Display a title line. This will be a merged table row with bolded text.
847    
848    =over 4
849    
850    =item cgi
851    
852    CGI query object used to generate HTML output.
853    
854    =item RETURN
855    
856    Returns the HTML text.
857    
858    =back
859    
860    =cut
861    
862    sub ShowTitle {
863        # Get the parameters.
864        my ($cgi, $title) = @_;
865        # Declare the return variable.
866        my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
867      # Return the result.      # Return the result.
868      return $retVal;      return $retVal;
869  }  }

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.26

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3