[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.22, Tue Apr 10 06:04:34 2007 UTC revision 1.23, Mon Jul 16 20:05:23 2007 UTC
# Line 12  Line 12 
12    
13  =over 4  =over 4
14    
 =item nofeats  
   
 Do not process features.  
   
15  =item user  =item user
16    
17  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 56  Line 52 
52  Output file name. The default is C<html/includes/diff.inc> in the  Output file name. The default is C<html/includes/diff.inc> in the
53  nmpdr C<next> directory.  nmpdr C<next> directory.
54    
55    =item orgFile
56    
57    Output file for the genome report. The default is C<html/includes/genomes.inc> in
58    the nmpdr C<next> directory.
59    
60  =back  =back
61    
62  =head2 The Group File  =head2 The Group File
# Line 105  Line 106 
106  my ($options, @parameters) = StandardSetup([qw(Sprout) ],  my ($options, @parameters) = StandardSetup([qw(Sprout) ],
107                                             {                                             {
108                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
                                               nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"],  
109                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
110                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
111                                                outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],                                                outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
112                                                  orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
113                                             },                                             },
114                                             "",                                             "",
115                                             @ARGV);                                             @ARGV);
# Line 129  Line 130 
130      # Get a nice-looking version name and make it into a title.      # Get a nice-looking version name and make it into a title.
131      my $version = uc $FIG_Config::nmpdr_version;      my $version = uc $FIG_Config::nmpdr_version;
132      $version =~ tr/_/ /;      $version =~ tr/_/ /;
133      push @html, $cgi->h4({align => "center"}, "Difference Report for $version.");      push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
134      # Start the table.      # Start the table.
135      push @html, $cgi->start_table({align => "center", border => "2"});      push @html, $cgi->start_table({align => "center", border => "2"});
136      # Get the group file.      # Get the group file.
# Line 229  Line 230 
230      }      }
231      push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,      push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,
232                                  'Deleted Subsystems' => $deletedSubs);                                  'Deleted Subsystems' => $deletedSubs);
233        # Print what we've done so far.
234        FlushData(\*OUTPUT, \@html);
235        # Now we need to process some statistics that require looping through all the
236        # features in the new sprout. While we're at it, we'll collect the BBH and
237        # coupling counts.
238        my $bbhCount = 0;
239        my $couplingCount = 0;
240        # One of the reports is only for genomes common to both sprouts. To help us
241        # make this determination, we get a hash of the inserted genomes.
242        my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
243        # Open the organism report file.
244        Open(\*ORGOUT, ">$options->{orgFile}");
245        # Start the table.
246        my @orgHtml = ();
247        push @orgHtml, $cgi->h4("Genome Report for $version");
248        push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
249        push @orgHtml, $cgi->Tr($cgi->th("Genome"),
250                                $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
251                                                              "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
252        FlushData(\*ORGOUT, \@orgHtml);
253        # Now we start the loop.
254        for my $genomeData (@newGenomes) {
255            # Get this genome's ID and name.
256            my $genomeID = $genomeData->[0];
257            # Create a title for it.
258            my $genomeTitle = "$genomeID: $genomeData->[1]";
259            # Compute its size.
260            my $genomeSize = $newSprout->GenomeLength($genomeID);
261            # Get a list of the genomes that are not this one.
262            my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
263            Trace("Computing BBH matrix for $genomeID.") if T(3);
264            # Get the bbh matrix going from the current gene to the others.
265            my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
266            # Set up the subsystem hash. This will be used to track which subsystems are used by
267            # the genome's features.
268            my %subHash = ();
269            # Set up the contig hash. This will be used to track which contigs are used by the
270            # genome's features.
271            my %contigHash = ();
272            # Set up a counter hash for feature types.
273            my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
274            # We'll store information about the genome's features (ID and functional role) in here.
275            my @newFeatures = ();
276            # Loop through the genome's features. The order is important here, because we need
277            # to match the order used by "GetFeatures" for the feature difference comparison.
278            Trace("Processing feature statistics for $genomeID.") if T(3);
279            my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
280                                         [$genomeID]);
281            # Loop through the features, updating the counts.
282            while (my $feature = $fquery->Fetch()) {
283                # Update the total feature count.
284                $counters{total}++;
285                Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
286                # Get the feature ID and role.
287                my $fid = $feature->PrimaryValue('Feature(id)');
288                push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
289                # Check to see if we have BBH data.
290                if (exists $matrix{$fid}) {
291                    $bbhCount += scalar keys %{$matrix{$fid}};
292                }
293                # Ask for couplings.
294                my %coupleHash = $newSprout->CoupledFeatures($fid);
295                $couplingCount += keys %coupleHash;
296                # See if this feature is in a subsystem.
297                my %subs = $newSprout->SubsystemsOf($fid);
298                if (keys %subs) {
299                    $counters{in_ss}++;
300                    for my $sub (keys %subs) {
301                        $subHash{$sub} = 1;
302                    }
303                }
304                # Increment the feature type counter.
305                $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
306                # Insure we've tracked this feature's contigs.
307                my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
308                for my $loc (@locations) {
309                    my $locObject = BasicLocation->new($loc);
310                    $contigHash{$locObject->Contig} = 1;
311                }
312            }
313            Trace("Feature data compiled for $genomeID.") if T(3);
314            # The last thing we need to do is compute the number of features added or deleted.
315            # This goes in the genome report, but it's only meaningful for common genomes.
316            my ($addCount, $delCount) = ("","");
317            if (! $skipGenomes{$genomeID}) {
318                # Get the old features.
319                my @oldFeatures = GetFeatures($oldSprout, $genomeID);
320                Trace("Comparing features for $genomeID.") if T(3);
321                # Compare the lists.
322                my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
323                $addCount = scalar(@{$insertedFeatures});
324                $delCount = scalar(@{$deletedFeatures});
325            }
326            push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
327                                    $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
328                                                                  scalar(keys %subHash), $counters{in_ss}, $counters{peg},
329                                                                  $counters{rna}, $counters{pp}, $addCount, $delCount]));
330            FlushData(\*ORGOUT, \@orgHtml);
331        }
332        # Close the table for the genome report.
333        push @orgHtml, $cgi->end_table();
334        FlushData(\*ORGOUT, \@orgHtml);
335        close ORGOUT;
336        # Flush the genome feature comparison data.
337        FlushData(\*OUTPUT, \@html);
338      # Next, we show some basic counts.      # Next, we show some basic counts.
339        Trace("Displaying counts.") if T(3);
340      push @html, ShowTitle($cgi, "Statistics for old Sprout");      push @html, ShowTitle($cgi, "Statistics for old Sprout");
341      push @html, ShowCounts($cgi, $oldSprout);      push @html, ShowCounts($cgi, $oldSprout);
342      push @html, ShowTitle($cgi, "Statistics for new Sprout");      push @html, ShowTitle($cgi, "Statistics for new Sprout");
343      push @html, ShowCounts($cgi, $newSprout);      push @html, ShowCounts($cgi, $newSprout);
344        push @html, ShowDatum($cgi, BBHs => $bbhCount);
345        push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
346        FlushData(\*OUTPUT, \@html);
347      # Now we show the genomes that are not in groups but could be. First, we convert      # Now we show the genomes that are not in groups but could be. First, we convert
348      # our group hash from the new Sprout into the form used on the web site.      # our group hash from the new Sprout into the form used on the web site.
349      Trace("Examining possible missing genomes in groups.") if T(2);      Trace("Examining possible missing genomes in groups.") if T(2);
# Line 269  Line 379 
379              push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);              push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
380          }          }
381      }      }
382      # Now we process the features of the common genes.      FlushData(\*OUTPUT, \@html);
     if (! $options->{nofeats}) {  
         # First we need a hash of the inserted stuff so we know to skip it.  
         my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};  
         # Loop through the genomees.  
         for my $genome (@newGenomes) {  
             # Get the ID and name.  
             my ($genomeID, $genomeName) = @{$genome};  
             Trace("Processing $genomeID.") if T(3);  
             # Only process the common genes.  
             if (! $skipGenomes{$genomeID}) {  
                 # Compare the genome group information.  
                 # Get the new and old features. This will be very stressful to the machine,  
                 # because there are lots of features.  
                 my @oldFeatures = GetFeatures($oldSprout, $genomeID);  
                 my @newFeatures = GetFeatures($newSprout, $genomeID);  
                 Trace("Comparing features for $genomeID.") if T(3);  
                 # Compare the lists.  
                 my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);  
                 # Display the lists. Only nonempty lists are displayed; however, we do a check  
                 # first anyway so the trace tells us what's happening.  
                 if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {  
                     Trace("Displaying feature differences.") if T(3);  
                     push @html, ShowLists($cgi, "New Features for $genomeID"      => $insertedFeatures,  
                                                 "Features Deleted from $genomeID" => $deletedFeatures);  
                 }  
             }  
         }  
     }  
383      # Close the table.      # Close the table.
384      push @html, $cgi->end_table();      push @html, $cgi->end_table();
385      # Assemble and write the HTML.      # Flush the last of the HTML.
386      Trace("Writing to output.") if T(2);      FlushData(\*OUTPUT, \@html);
     print OUTPUT join("\n", @html, "");  
387      # Close the output file.      # Close the output file.
388      close OUTPUT;      close OUTPUT;
389      Trace("Analysis complete.") if T(2);      Trace("Analysis complete.") if T(2);
# Line 323  Line 404 
404      }      }
405  }  }
406    
407    =head3 FlushData
408    
409    C<< FlushData($handle, \@lines); >>
410    
411    Write the specified lines to the output file and clear them out of the list. This
412    method is called periodically so that even if something goes wrong we can still
413    see the data accumulating in the output file.
414    
415    =over 4
416    
417    =item handle
418    
419    Output handle to which the lines should be written.
420    
421    =item lines
422    
423    Reference to a list of output lines. The output lines will be written to the output
424    handle and then removed from the list.
425    
426    =back
427    
428    =cut
429    
430    sub FlushData {
431        # Get the parameters.
432        my ($handle, $lines) = @_;
433        Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
434        # Write the lines.
435        print $handle join("\n", @{$lines});
436        # Write a terminating new-line.
437        print $handle "\n";
438        # Clear the list.
439        splice @{$lines};
440    }
441    
442  =head3 GetGenomes  =head3 GetGenomes
443    
444  C<< my @geneList = GetGenomes($sprout); >>  C<< my @geneList = GetGenomes($sprout); >>
# Line 557  Line 673 
673              for my $entry (@{$list}) {              for my $entry (@{$list}) {
674                  my ($name, $data) = @{$entry};                  my ($name, $data) = @{$entry};
675                  $name =~ tr/_/ /;                  $name =~ tr/_/ /;
676                  push @retVal, $cgi->Tr($cgi->td($name), $cgi->td({align => "right"}, $data));                  push @retVal, ShowDatum($cgi, $name => $data);
677              }              }
678          }          }
679      }      }
# Line 636  Line 752 
752      # Count genomes and subsystems.      # Count genomes and subsystems.
753      my $genomes = $sprout->GetCount(['Genome']);      my $genomes = $sprout->GetCount(['Genome']);
754      my $subsystems = $sprout->GetCount(['Subsystem']);      my $subsystems = $sprout->GetCount(['Subsystem']);
755      # Count roles, external functional assignments, and functional couplings.      # Count roles and external functional assignments.
756      my $roles = $sprout->GetCount(['OccursInSubsystem']);      my $roles = $sprout->GetCount(['OccursInSubsystem']);
757      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
     my $couples = $sprout->GetCount(['Coupling']);  
758      # Count features.      # Count features.
759      my $features = $sprout->GetCount(['Feature']);      my $features = $sprout->GetCount(['Feature']);
760      # Display the counts.      # Display the counts.
761      my @retVal = ();      my @retVal = ();
762      push @retVal, $cgi->Tr($cgi->td("Genomes"), $cgi->td({ align => "right" }, $genomes));      push @retVal, ShowDatum($cgi, Genomes => $genomes);
763      push @retVal, $cgi->Tr($cgi->td("Subsystems"), $cgi->td({ align => "right" }, $subsystems));      push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
764      push @retVal, $cgi->Tr($cgi->td("Roles"), $cgi->td({ align => "right" }, $roles));      push @retVal, ShowDatum($cgi, Roles => $roles);
765      push @retVal, $cgi->Tr($cgi->td("External function assignments"), $cgi->td({ align => "right" }, $funcs));      push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
766      push @retVal, $cgi->Tr($cgi->td("Features"), $cgi->td({ align => "right" }, $features));      push @retVal, ShowDatum($cgi, Features => $features);
     push @retVal, $cgi->Tr($cgi->td("Functional Coupling"), $cgi->td({ align => "right" }, $couples));  
767      # Return the html.      # Return the html.
768      return @retVal;      return @retVal;
769  }  }
770    
771    =head3 ShowDatum
772    
773    C<< my $htmlText = ShowDatum($cgi, $label, $value); >>
774    
775    Return a table row displaying the specified label and value.
776    
777    =over 4
778    
779    =item cgi
780    
781    CGI query object used to generate the HTML text.
782    
783    =item label
784    
785    Label to appear in the left cell of the table row.
786    
787    =item value
788    
789    Value to appear in the right cell of the table row.
790    
791    =item RETURN
792    
793    Returns the HTML for a single table row with the last cell right-aligned.
794    
795    =back
796    
797    =cut
798    
799    sub ShowDatum {
800        # Get the parameters.
801        my ($cgi, $label, $value) = @_;
802        # Create the table row.
803        my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
804        # Return it.
805        return $retVal;
806    }
807    
808  =head3 ShowTitle  =head3 ShowTitle
809    
810  C<< my $html = ShowTitle($cgi, $title); >>  C<< my $html = ShowTitle($cgi, $title); >>

Legend:
Removed from v.1.22  
changed lines
  Added in v.1.23

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3