[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Sat Sep 2 17:02:08 2006 UTC revision 1.26, Tue Feb 5 05:15:42 2008 UTC
# Line 3  Line 3 
3  =head1 New Stuff Checker  =head1 New Stuff Checker
4    
5  This script compares the genomes, features, and annotations in  This script compares the genomes, features, and annotations in
6  the old and new sprouts and lists the differences.  the old and new sprouts and lists the differences and produces
7    a report in HTML. The output is an HTML fragment, not an entire
8    web page. This is because we expect it to be included in another
9    page.
10    
11  The currently-supported command-line options for NewStuffCheck are as follows.  The currently-supported command-line options for NewStuffCheck are as follows.
12    
13  =over 4  =over 4
14    
 =item summary  
   
 Do not display details, only difference summaries.  
   
 =item nofeats  
   
 Do not process features.  
   
15  =item user  =item user
16    
17  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 52  Line 47 
47  Name of the group file (described below). The default is C<groups.tbl>  Name of the group file (described below). The default is C<groups.tbl>
48  in the Sprout data directory.  in the Sprout data directory.
49    
50    =item outFile
51    
52    Output file name. The default is C<html/includes/diff.inc> in the
53    nmpdr C<next> directory.
54    
55    =item orgFile
56    
57    Output file for the genome report. The default is C<html/includes/genomes.inc> in
58    the nmpdr C<next> directory.
59    
60  =back  =back
61    
62  =head2 The Group File  =head2 The Group File
# Line 77  Line 82 
82  =item species  =item species
83    
84  Species of the group, or an empty string if the group is for an entire  Species of the group, or an empty string if the group is for an entire
85  genus.  genus. If the group contains more than one species, the species names
86    should be separated by commas.
87    
88  =back  =back
89    
# Line 85  Line 91 
91    
92  use strict;  use strict;
93  use Tracer;  use Tracer;
 use DocUtils;  
 use TestUtils;  
94  use Cwd;  use Cwd;
95  use File::Copy;  use File::Copy;
96  use File::Path;  use File::Path;
97  use FIG;  use FIG;
98  use SFXlate;  use SFXlate;
99  use Sprout;  use Sprout;
100    use CGI;
101    use FIGRules;
102    
103  # Get the command-line options and parameters.  # Get the command-line options and parameters.
104  my ($options, @parameters) = StandardSetup([qw(Sprout) ],  my ($options, @parameters) = StandardSetup([qw(Sprout) ],
105                                             {                                             {
106                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
107                                                nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"],                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
                                               trace => ["2-", "tracing level; use a minus to prevent tracing to standard output"],  
                                               summary => ["", "if specified, detailed lists of the different items will not be displayed"],  
108                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
109                                                  outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
110                                                  orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
111                                             },                                             },
112                                             "",                                             "",
113                                             @ARGV);                                             @ARGV);
# Line 109  Line 115 
115  my $rtype;  my $rtype;
116  # Insure we catch errors.  # Insure we catch errors.
117  eval {  eval {
118        # Get a CGI object for building the output. We pass it the options hash so
119        # the formatting subroutines have access to it. Also, we want it to know
120        # we're not a real web script.
121        my $cgi = CGI->new($options);
122        # Start accumulating HTML data.
123        my @html = ();
124        # Open the output file. We do this early in case there's a problem.
125        my $outFileName = $options->{outFile};
126        Trace("Opening output file $outFileName.") if T(2);
127        Open(\*OUTPUT, ">$outFileName");
128        # Get a nice-looking version name and make it into a title.
129        my $version = uc $FIG_Config::nmpdr_version;
130        $version =~ tr/_/ /;
131        push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
132        # Start the table.
133        push @html, $cgi->start_table({align => "center", border => "2"});
134      # Get the group file.      # Get the group file.
135      my $groupFileName = $options->{groupFile};      my $groupFileName = $options->{groupFile};
136      Trace("Reading group file $groupFileName.") if T(2);      Trace("Reading group file $groupFileName.") if T(2);
     my @groupLines = Tracer::GetFile($groupFileName);  
     # Well put each group's data into a hash, keyed by group  
     # name, each entry being a 3-tuple of page name, genus,  
     # and species  
     my %groups;  
     for my $groupLine (@groupLines) {  
         my ($name, $page, $genus, $species) = split(/\t/, $groupLine);  
         $groups{$name} = [$page, $genus, $species];  
     }  
137      Trace("Processing genomes.") if T(2);      Trace("Processing genomes.") if T(2);
138      # Get the current SEED.      # Get the current SEED.
139      my $fig = FIG->new();      my $fig = FIG->new();
140      # Get the old Sprout.      # Get the old Sprout.
141      my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);      my $oldSprout = SFXlate->old_sprout_only();
142      # Get its genomes in alphabetical order.      # Get its genomes in alphabetical order.
143      my @oldGenomes = GetGenomes($oldSprout);      my @oldGenomes = GetGenomes($oldSprout);
144      # Get the new Sprout.      # Get the new Sprout.
# Line 134  Line 147 
147      my @newGenomes = GetGenomes($newSprout);      my @newGenomes = GetGenomes($newSprout);
148      # Compare the two genomes lists.      # Compare the two genomes lists.
149      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);      my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
150      # Add feature counts to the new genomes.      # Get the super-group data.
151        my %superTable = $newSprout->CheckGroupFile();
152        # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
153        # lists so that the genome names are first and the IDs second.
154        my @insertedGenomeList = ();
155      for my $insertedGenome (@{$insertedGenomes}) {      for my $insertedGenome (@{$insertedGenomes}) {
156          my $genomeID = $insertedGenome->[0];          my $genomeID = $insertedGenome->[0];
157          # For a new genome, display the feature count.          # For a new genome, display the feature and BBH counts.
158          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",          my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
159                                           [$genomeID]);                                           [$genomeID]);
160          my $suffix = ($count == 1 ? " one feature" : "$count features");          my $suffix = ($count == 1 ? " one feature" : "$count features");
161          $insertedGenome->[1] .= "($suffix)";          my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
162      }          $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
163      # Add information about SEED status to the deleted genomes.          push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
164        }
165        # Create a list for the deleted genomes that contains information about SEED status.
166        # This list is flipped, too.
167        my @deletedGenomeList = ();
168      for my $deletedGenome (@{$deletedGenomes}) {      for my $deletedGenome (@{$deletedGenomes}) {
169          my $genomeID = $deletedGenome->[0];          my $genomeID = $deletedGenome->[0];
170            my $suffix = "";
171          if ($fig->is_genome($genomeID)) {          if ($fig->is_genome($genomeID)) {
172                # Here the deleted genome is still in the SEED.
173              my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");              my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
174              $deletedGenome->[1] .= "(still in SEED, $complete)";              $suffix = " (still in SEED, $complete)";
175            } else {
176                # It's not in the SEED. See if it has been replaced.
177                my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
178                my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
179                if (scalar @genomeIDs) {
180                    $suffix = " (replaced)";
181          }          }
182      }      }
183            push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
184        }
185      # Display the lists.      # Display the lists.
186      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Genomes'     => \@insertedGenomeList,
187                'New Genomes'     => $insertedGenomes,                                  'Deleted Genomes' => \@deletedGenomeList);
               'Deleted Genomes' => $deletedGenomes);  
188      # Now the groups.      # Now the groups.
189      Trace("Comparing groups.") if T(2);      Trace("Comparing groups.") if T(2);
190      my %oldGroups = $oldSprout->GetGroups();      my %oldGroups = $oldSprout->GetGroups();
# Line 166  Line 196 
196          if (! exists $oldGroups{$newGroup}) {          if (! exists $oldGroups{$newGroup}) {
197              # Construct a list of this group's genes.              # Construct a list of this group's genes.
198              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
199              ShowLists(! $options->{summary}, "Genomes in new group $newGroup" => \@groupGenomes);              push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
200          } else {          } else {
201              # Here the group is in both versions. Fix the lists and compare them.              # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
202              my @newGroupList = NameGenomes($newSprout, $newGroups{$newGroup});              # on the genome ID, which will become the second list element after the call to NameGenomes.
203              my @oldGroupList = NameGenomes($oldSprout, $oldGroups{$newGroup});              my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
204                my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
205              Trace("Comparing lists for $newGroup.") if T(4);              Trace("Comparing lists for $newGroup.") if T(4);
206              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList);              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
207              Trace("Comparison complete.") if T(4);              Trace("Comparison complete.") if T(4);
208              # Delete the old group data. When we're done, this means we'll have a list of the deleted              # Delete the old group data. When we're done, this means the hash
209              # groups.              # will contain only the deleted groups.
210              delete $oldGroups{$newGroup};              delete $oldGroups{$newGroup};
211              # Show the lists. Empty lists will not be shown.              # Show the lists. Empty lists will not be shown.
212              Trace("Displaying group lists.") if T(4);              Trace("Displaying group lists.") if T(4);
213              ShowLists(! $options->{summary},              push @html, ShowLists($cgi, "Genomes new to $newGroup"       => $insertedGroupGenomes,
                       "Genomes new to $newGroup" => $insertedGroupGenomes,  
214                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);
215          }          }
216      }      }
# Line 189  Line 219 
219      for my $oldGroup (sort keys %oldGroups) {      for my $oldGroup (sort keys %oldGroups) {
220          Trace("Processing deleted group $oldGroup.") if T(3);          Trace("Processing deleted group $oldGroup.") if T(3);
221          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
222          ShowLists(! $options->{summary}, "Genomes in deleted group $oldGroup" => \@groupGenomes);          push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
223      }      }
224      # Next, we get the subsystems.      # Next, we get the subsystems.
225      Trace("Processing subsystems.") if T(2);      Trace("Processing subsystems.") if T(2);
# Line 208  Line 238 
238              }              }
239          }          }
240      }      }
241      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,
               'New Subsystems'     => $insertedSubs,  
242                'Deleted Subsystems' => $deletedSubs);                'Deleted Subsystems' => $deletedSubs);
243        # Print what we've done so far.
244        FlushData(\*OUTPUT, \@html);
245        # Now we need to process some statistics that require looping through all the
246        # features in the new sprout. While we're at it, we'll collect the BBH and
247        # coupling counts.
248        my $bbhCount = 0;
249        my $couplingCount = 0;
250        # We'll accumulate a report of genomes with missing BBHs in here.
251        my @bbhMissingGenomes = ();
252        # One of the reports is only for genomes common to both sprouts. To help us
253        # make this determination, we get a hash of the inserted genomes.
254        my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
255        # Open the organism report file.
256        Open(\*ORGOUT, ">$options->{orgFile}");
257        # Start the table.
258        my @orgHtml = ();
259        push @orgHtml, $cgi->h4({ align => 'center' }, "Genome Report for $version");
260        push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
261        push @orgHtml, $cgi->Tr($cgi->th("Genome"),
262                                $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
263                                                              "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
264        FlushData(\*ORGOUT, \@orgHtml);
265        # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
266        # not the list of genomes that are new!
267        for my $genomeData (@newGenomes) {
268            # Get this genome's ID and name.
269            my $genomeID = $genomeData->[0];
270            # Create a title for it.
271            my $genomeTitle = "$genomeID: $genomeData->[1]";
272            # Compute its size.
273            my $genomeSize = $newSprout->GenomeLength($genomeID);
274            # Get a list of the genomes in the new Sprout that are not this one.
275            my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
276            Trace("Computing BBH matrix for $genomeID.") if T(3);
277            # Get the bbh matrix going from the current genome to the others.
278            my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
279            # Set up the subsystem hash. This will be used to track which subsystems are used by
280            # the genome's features.
281            my %subHash = ();
282            # Set up the contig hash. This will be used to track which contigs are used by the
283            # genome's features.
284            my %contigHash = ();
285            # Set up a counter hash for feature types.
286            my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
287            # We'll store information about the genome's features (ID and functional role) in here.
288            my @newFeatures = ();
289            # Finally, we'll use this flag to warn us if there are no BBHs.
290            my $bbhsFound = 0;
291            # Loop through the genome's features. The order is important here, because we need
292            # to match the order used by "GetFeatures" for the feature difference comparison.
293            Trace("Processing feature statistics for $genomeID.") if T(3);
294            my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
295                                         [$genomeID]);
296            # Loop through the features, updating the counts.
297            while (my $feature = $fquery->Fetch()) {
298                # Update the total feature count.
299                $counters{total}++;
300                Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
301                # Get the feature ID and role.
302                my $fid = $feature->PrimaryValue('Feature(id)');
303                push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
304                # Check to see if we have BBH data.
305                if (exists $matrix{$fid}) {
306                    my $fidBbhCount = scalar keys %{$matrix{$fid}};
307                    if ($fidBbhCount > 0) {
308                        # Denote that this feature has BBHs.
309                        $bbhsFound = 1;
310                        # Add them to the total BBH count.
311                        $bbhCount += $fidBbhCount;
312                    }
313                }
314                # Ask for couplings.
315                my %coupleHash = $newSprout->CoupledFeatures($fid);
316                $couplingCount += keys %coupleHash;
317                # See if this feature is in a subsystem.
318                my %subs = $newSprout->SubsystemsOf($fid);
319                if (keys %subs) {
320                    $counters{in_ss}++;
321                    for my $sub (keys %subs) {
322                        $subHash{$sub} = 1;
323                    }
324                }
325                # Increment the feature type counter.
326                $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
327                # Insure we've tracked this feature's contigs.
328                my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
329                for my $loc (@locations) {
330                    my $locObject = BasicLocation->new($loc);
331                    $contigHash{$locObject->Contig} = 1;
332                }
333            }
334            Trace("Feature data compiled for $genomeID.") if T(3);
335            # The last thing we need to do is compute the number of features added or deleted.
336            # This goes in the genome report, but it's only meaningful for common genomes.
337            my ($addCount, $delCount) = ("","");
338            if (! $skipGenomes{$genomeID}) {
339                # Get the old features.
340                my @oldFeatures = GetFeatures($oldSprout, $genomeID);
341                Trace("Comparing features for $genomeID.") if T(3);
342                # Compare the lists.
343                my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
344                $addCount = scalar(@{$insertedFeatures});
345                $delCount = scalar(@{$deletedFeatures});
346            }
347            # Check to see if this genome is missing its BBHs.
348            if (! $bbhsFound) {
349                # It is, so add a line for it to the missing-BBH list.
350                push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
351            }
352            push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
353                                    $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
354                                                                  scalar(keys %subHash), $counters{in_ss}, $counters{peg},
355                                                                  $counters{rna}, $counters{pp}, $addCount, $delCount]));
356            FlushData(\*ORGOUT, \@orgHtml);
357        }
358        # Close the table for the genome report.
359        push @orgHtml, $cgi->end_table();
360        FlushData(\*ORGOUT, \@orgHtml);
361        close ORGOUT;
362        # Check for a missing-BBH report.
363        if (scalar @bbhMissingGenomes) {
364            # There is a report, so put it into the output stream.
365            push @html, ShowTitle($cgi, "Genomes without BBHs");
366            push @html, @bbhMissingGenomes;
367        }
368        # Flush the genome feature comparison data and the missing-BBH report (if any).
369        FlushData(\*OUTPUT, \@html);
370      # Next, we show some basic counts.      # Next, we show some basic counts.
371      print "\nStatistics for old Sprout\n\n";      Trace("Displaying counts.") if T(3);
372      ShowCounts($oldSprout);      push @html, ShowTitle($cgi, "Statistics for old Sprout");
373      print "\nStatistics for new Sprout\n\n";      push @html, ShowCounts($cgi, $oldSprout);
374      ShowCounts($newSprout);      push @html, ShowTitle($cgi, "Statistics for new Sprout");
375        push @html, ShowCounts($cgi, $newSprout);
376        push @html, ShowDatum($cgi, BBHs => $bbhCount);
377        push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
378        FlushData(\*OUTPUT, \@html);
379      # Now we show the genomes that are not in groups but could be. First, we convert      # Now we show the genomes that are not in groups but could be. First, we convert
380      # our group hash from the new Sprout into the form used on the web site.      # our group hash from the new Sprout into the form used on the web site.
381      Trace("Examining possible missing genomes in groups.") if T(2);      Trace("Examining possible missing genomes in groups.") if T(2);
382      my %fixedGroups = Sprout::Fix(%newGroups);      my %fixedGroups = $newSprout->Fix(%newGroups);
383      for my $group (sort keys %groups) {      for my $group (sort keys %superTable) {
384          Trace("Checking group $group.");          Trace("Checking group $group.");
385          # Get this group's genus and species.          # Loop through this group's genus/species pairs creating filters
386          my $genus = $groups{$group}->[1];          # for a genome query.
387          my $species = $groups{$group}->[2];          my @filters = ();
388          # Get a hash of the genomes already in it.          my @filterParms = ();
389          my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};          for my $genusSpecies (@{$superTable{$group}->{content}}) {
390          # Get a list of its possible genomes.              my ($genus, $species) = @{$genusSpecies};
391                # Filter on genus.
392          my $filter = 'Genome(genus) = ?';          my $filter = 'Genome(genus) = ?';
393          my @parms = ($genus);              push @filterParms, $genus;
394                # If necessary, filter on species.
395          if ($species) {          if ($species) {
396              $filter .= ' AND Genome(species) = ?';              $filter .= ' AND Genome(species) = ?';
397              push @parms, $species;                  push @filterParms, $species;
398          }          }
399          my @possibles = $newSprout->GetFlat(['Genome'], $filter, \@parms, 'Genome(id)');              # Add this filter to the list.
400                push @filters, "($filter)";
401            }
402            # Get all the genomes that should be in the super-group.
403            my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters),
404                                                \@filterParms, 'Genome(id)');
405            # Get a hash of the genomes already in it.
406            my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
407          # Get the possibles that aren't in the group and add identifying information.          # Get the possibles that aren't in the group and add identifying information.
408          my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);          my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
409          # If anything survived, show the list.          # If anything survived, show the list.
410          if (@leftOut) {          if (@leftOut) {
411              ShowLists(! $options->{summary}, "Candidates for $group" => \@leftOut);              push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
         }  
     }  
     # Compare the property tables.  
     Trace("Comparing properties.") if T(2);  
     # Set up lists of all the properties in each sprout.  
     my @oldProps = GetProperties($oldSprout);  
     my @newProps = GetProperties($newSprout);  
     # Compare the lists.  
     my ($insertedProps, $deletedProps) = Tracer::CompareLists(\@oldProps, \@newProps);  
     # Now get all the properties in the new Sprout without any features.  
     my @emptyProps = grep { $_->[1] == 0 } @newProps;  
     # Show what we've found.  
     ShowLists(! $options->{summary}, 'New Properties' => $insertedProps,  
                                      'Deleted Properties' => $deletedProps,  
                                      'Empty Properties' => \@emptyProps);  
     # Now we process the features of the common genes.  
     if (! $options->{nofeats}) {  
         # First we need a hash of the inserted stuff so we know to skip it.  
         my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};  
         # Loop through the genomees.  
         for my $genome (@newGenomes) {  
             # Get the ID and name.  
             my ($genomeID, $genomeName) = @{$genome};  
             Trace("Processing $genomeID.") if T(3);  
             # Only process the common genes.  
             if (! $skipGenomes{$genomeID}) {  
                 # Compare the genome group information.  
                 # Get the new and old features. This will be very stressful to the machine,  
                 # because there are lots of features.  
                 my @oldFeatures = GetFeatures($oldSprout, $genomeID);  
                 my @newFeatures = GetFeatures($newSprout, $genomeID);  
                 Trace("Comparing features for $genomeID.") if T(3);  
                 # Compare the lists.  
                 my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);  
                 # Display the lists. Only nonempty lists are displayed; however, we do a check  
                 # first anyway so the trace tells us what's happening.  
                 if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {  
                     Trace("Displaying feature differences.") if T(3);  
                     ShowLists(! $options->{summary},  
                               "New Features for $genomeID"      => $insertedFeatures,  
                               "Features Deleted from $genomeID" => $deletedFeatures);  
                 }  
             }  
412          }          }
413      }      }
414        FlushData(\*OUTPUT, \@html);
415        # Close the table.
416        push @html, $cgi->end_table();
417        # Flush the last of the HTML.
418        FlushData(\*OUTPUT, \@html);
419        # Close the output file.
420        close OUTPUT;
421        Trace("Analysis complete.") if T(2);
422  };  };
423  if ($@) {  if ($@) {
424      Trace("Script failed with error: $@") if T(0);      Trace("Script failed with error: $@") if T(0);
# Line 302  Line 436 
436      }      }
437  }  }
438    
439    =head3 FlushData
440    
441        FlushData($handle, \@lines);
442    
443    Write the specified lines to the output file and clear them out of the list. This
444    method is called periodically so that even if something goes wrong we can still
445    see the data accumulating in the output file. The key aspect here is that we
446    put new-line characters after each line written and show something in the trace
447    log.
448    
449    =over 4
450    
451    =item handle
452    
453    Output handle to which the lines should be written.
454    
455    =item lines
456    
457    Reference to a list of output lines. The output lines will be written to the output
458    handle and then removed from the list.
459    
460    =back
461    
462    =cut
463    
464    sub FlushData {
465        # Get the parameters.
466        my ($handle, $lines) = @_;
467        Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
468        # Write the lines.
469        print $handle join("\n", @{$lines});
470        # Write a terminating new-line.
471        print $handle "\n";
472        # Clear the list.
473        splice @{$lines};
474    }
475    
476  =head3 GetGenomes  =head3 GetGenomes
477    
478  C<< my @geneList = GetGenomes($sprout); >>      my @geneList = GetGenomes($sprout);
479    
480  Return a list of the genomes in the specified Sprout instance. The genomes  Return a list of the genomes in the specified Sprout instance. The genomes
481  are returned in alphabetical order by genome ID.  are returned in alphabetical order by genome ID.
# Line 340  Line 511 
511    
512  =head3 NameGenomes  =head3 NameGenomes
513    
514  C<< my @newList = NameGenomes($sprout, \@genomes); >>      my @newList = NameGenomes($sprout, \@genomes);
515    
516  Convert a list of genome IDs to a list of genome IDs with names.  Convert a list of genome IDs to a list of genome IDs with names.
517    
# Line 367  Line 538 
538      # Get the parameters.      # Get the parameters.
539      my ($sprout, $genomes) = @_;      my ($sprout, $genomes) = @_;
540      # Attach the names.      # Attach the names.
541      my @retVal = map { [$_, $sprout->GenusSpecies($_) ] } @{$genomes};      my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
542      # Return the result.      # Return the result.
543      return @retVal;      return @retVal;
544  }  }
545    
546  =head3 GetSubsystems  =head3 GetSubsystems
547    
548  C<< my @subsystems = GetSubsystems($sprout); >>      my @subsystems = GetSubsystems($sprout);
549    
550  Get a list of the subsystems in the specified Sprout instance.  Get a list of the subsystems in the specified Sprout instance.
551    
# Line 405  Line 576 
576    
577  =head3 GetProperties  =head3 GetProperties
578    
579  C<< my @propertyList = GetProperties($sprout); >>      my @propertyList = GetProperties($sprout);
580    
581  Return a list of properties. Each element in the list will be a 2-tuple containing  Return a list of properties. Each element in the list will be a 2-tuple containing
582  the property name and value in the first column and its ID in the second column.  the property name and value in the first column and its ID in the second column.
# Line 437  Line 608 
608      # Combine the property names and values and replace each property ID by a feature count.      # Combine the property names and values and replace each property ID by a feature count.
609      my @retVal;      my @retVal;
610      for my $propItem (@props) {      for my $propItem (@props) {
611          my $label = $propItem->[0] . " = " . $propItem->[1];          # Split up the value on punctuation boundaries for readability.
612            my $propValue = $propItem->[1];
613            $propValue =~ s/::/ :: /g;
614            $propValue =~ s/([,;])(\S)/$1 $2/g;
615            my $label = $propItem->[0] . " = " . $propValue;
616          my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",          my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
617                                        [$propItem->[2]]);                                        [$propItem->[2]]);
618          push @retVal, [$label, $count];          push @retVal, [$label, $count];
# Line 448  Line 623 
623    
624  =head3 GetFeatures  =head3 GetFeatures
625    
626  C<< my @features = GetFeatures($sprout, $genomeID); >>      my @features = GetFeatures($sprout, $genomeID);
627    
628  Return the features of the specified genome in the specified Sprout instance.  Return the features of the specified genome in the specified Sprout instance.
629    
# Line 484  Line 659 
659    
660  =head3 ShowLists  =head3 ShowLists
661    
662  C<< ShowLists($all, %lists); >>      my @htmlLines = ShowLists($cgi, %lists);
663    
664  Display a set of lists. Each list should consist of 2-tuples.  Display a set of lists. Each list should consist of 2-tuples, and the list
665    entries will be displayed as 2-element table rows with a header row.
666    
667  =over 4  =over 4
668    
669  =item all  =item cgi
670    
671  TRUE if details should be displayed; FALSE if only summaries should be displayed.  A CGI query object containing the options for this program. It is also used to format
672    HTML.
673    
674  =item lists  =item lists
675    
676  A hash mapping list names to list references.  A hash mapping list names to list references.
677    
678    =item RETURN
679    
680    Returns a list of HTML lines displaying the list in tabular form.
681    
682    =back
683    
684  =cut  =cut
685    
686  sub ShowLists {  sub ShowLists {
687      # Get the parameters.      # Get the parameters.
688      my $all = shift @_;      my $cgi = shift @_;
689      my %lists = @_;      my %lists = @_;
690        # Declare the return variable. The HTML lines will be accumulated
691        # in here and then joined with new-lines.
692        my @retVal = ();
693      # Loop through the lists in alphabetical order by list name.      # Loop through the lists in alphabetical order by list name.
694      for my $listName (sort keys %lists) {      for my $listName (sort keys %lists) {
695          # Get the list itself.          # Get the list itself.
# Line 512  Line 698 
698          my $listSize = scalar @{$list};          my $listSize = scalar @{$list};
699          # Only proceed if the list is nonempty.          # Only proceed if the list is nonempty.
700          if ($listSize > 0) {          if ($listSize > 0) {
701              my $header = ShowHeader($listName, $listSize);              my $header = ComputeHeader($listName, $listSize);
             print "$header\n";  
702              Trace($header) if T(3);              Trace($header) if T(3);
703              # If we're at trace level 3, display the list.              # Display the header line as a header.
704              if ($all) {              push @retVal, ShowTitle($cgi, $header);
705                  # Put a spacer under the title.              # Now display the list as table rows. Note we convert underbars to spaces
706                  print "\n";              # in the name row to make the table easier to fit into narrow places.
                 # Get the width of the name column.  
                 my $width = 0;  
                 for my $entryLen (map { length $_->[0] } @{$list}) {  
                     $width = $entryLen if $entryLen > $width;  
                 }  
                 # Now display the list.  
707                  for my $entry (@{$list}) {                  for my $entry (@{$list}) {
708                      my ($name, $data) = @{$entry};                      my ($name, $data) = @{$entry};
709                      print "    $name" . (" " x ($width - length $name)) . " $data\n";                  $name =~ tr/_/ /;
710                  }                  push @retVal, ShowDatum($cgi, $name => $data);
                 print "\n\n";  
711              }              }
712          }          }
713      }      }
714        # Return the list of HTML lines.
715        return @retVal;
716  }  }
717    
718  =head3 ShowHeader  =head3 ComputeHeader
719    
720  C<< my $header = ShowHeader($name, $count); >>      my $header = ComputeHeader($name, $count);
721    
722  Return a list header for a list of the specified length.  Return a list header for a list of the specified length.
723    
# Line 559  Line 739 
739    
740  =cut  =cut
741    
742  sub ShowHeader {  sub ComputeHeader {
743      # Get the parameters.      # Get the parameters.
744      my ($name, $count) = @_;      my ($name, $count) = @_;
745      # Declare the return variable.      # Declare the return variable.
746      my $retVal;      my $retVal;
747      if ($count == 0) {      if ($count == 0) {
748          $retVal = "*** $name: none";          $retVal = "$name: none";
749      } elsif ($count == 1) {      } elsif ($count == 1) {
750          $retVal = "*** $name: one";          $retVal = "$name: one";
751      } else {      } else {
752          $retVal = "*** $name: $count";          $retVal = "$name: $count";
753      }      }
754      # Return the result.      # Return the result.
755      return $retVal;      return $retVal;
# Line 577  Line 757 
757    
758  =head3 ShowCounts  =head3 ShowCounts
759    
760  C<< ShowCounts($sprout); >>      ShowCounts($sprout);
761    
762  Display general counts for the specified sprout instance. These counts are  Display general counts for the specified sprout instance. These counts are
763  used in progress reports.  used in progress reports.
764    
765  =over 4  =over 4
766    
767    =item cgi
768    
769    CGI query object used to format the output.
770    
771  =item sprout  =item sprout
772    
773  Sprout instance for which counts are to be produced.  Sprout instance for which counts are to be produced.
774    
775    =item RETURN
776    
777    Returns a list of HTML lines with the counts arranged in table rows.
778    
779  =back  =back
780    
781  =cut  =cut
782    
783  sub ShowCounts {  sub ShowCounts {
784      # Get the parameters.      # Get the parameters.
785      my ($sprout) = @_;      my ($cgi, $sprout) = @_;
786      # Count genomes and subsystems.      # Count genomes and subsystems.
787      my $genomes = $sprout->GetCount(['Genome']);      my $genomes = $sprout->GetCount(['Genome']);
788      my $subsystems = $sprout->GetCount(['Subsystem']);      my $subsystems = $sprout->GetCount(['Subsystem']);
789      # Count roles, external functional assignments, and functional couplings.      # Count roles and external functional assignments.
790      my $roles = $sprout->GetCount(['OccursInSubsystem']);      my $roles = $sprout->GetCount(['OccursInSubsystem']);
791      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
792      my $couples = $sprout->GetCount(['Coupling']);      # Count features.
     # Count features and BBHs.  
     my $bbhs = int ($sprout->GetCount(['IsBidirectionalBestHitOf']) / 2);  
793      my $features = $sprout->GetCount(['Feature']);      my $features = $sprout->GetCount(['Feature']);
794      # Display the counts.      # Display the counts.
795      print "Genomes = $genomes.\n";      my @retVal = ();
796      print "Subsystems = $subsystems.\n";      push @retVal, ShowDatum($cgi, Genomes => $genomes);
797      print "Roles = $roles.\n";      push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
798      print "External function assignments = $funcs.\n";      push @retVal, ShowDatum($cgi, Roles => $roles);
799      print "Features = $features.\n";      push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
800      print "Bidirectional Best Hits = $bbhs.\n";      push @retVal, ShowDatum($cgi, Features => $features);
801      print "Functional couplings = $couples.\n";      # Return the html.
802      print "\n";      return @retVal;
803  }  }
804    
805    =head3 ShowDatum
806    
807        my $htmlText = ShowDatum($cgi, $label, $value);
808    
809    Return a table row displaying the specified label and value.
810    
811    =over 4
812    
813    =item cgi
814    
815    CGI query object used to generate the HTML text.
816    
817    =item label
818    
819    Label to appear in the left cell of the table row.
820    
821    =item value
822    
823    Value to appear in the right cell of the table row.
824    
825    =item RETURN
826    
827    Returns the HTML for a single table row with the last cell right-aligned.
828    
829    =back
830    
831    =cut
832    
833    sub ShowDatum {
834        # Get the parameters.
835        my ($cgi, $label, $value) = @_;
836        # Create the table row.
837        my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
838        # Return it.
839        return $retVal;
840    }
841    
842    =head3 ShowTitle
843    
844        my $html = ShowTitle($cgi, $title);
845    
846    Display a title line. This will be a merged table row with bolded text.
847    
848    =over 4
849    
850    =item cgi
851    
852    CGI query object used to generate HTML output.
853    
854    =item RETURN
855    
856    Returns the HTML text.
857    
858    =back
859    
860    =cut
861    
862    sub ShowTitle {
863        # Get the parameters.
864        my ($cgi, $title) = @_;
865        # Declare the return variable.
866        my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
867        # Return the result.
868        return $retVal;
869    }
870    
871  1;  1;

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.26

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3