[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Tue Sep 19 00:26:21 2006 UTC revision 1.16, Sun Oct 8 20:54:32 2006 UTC
# Line 3  Line 3 
3  =head1 New Stuff Checker  =head1 New Stuff Checker
4    
5  This script compares the genomes, features, and annotations in  This script compares the genomes, features, and annotations in
6  the old and new sprouts and lists the differences.  the old and new sprouts and lists the differences and produces
7    a report in HTML. The output is an HTML fragment, not an entire
8    web page. This is because we expect it to be included in another
9    page.
10    
11  The currently-supported command-line options for NewStuffCheck are as follows.  The currently-supported command-line options for NewStuffCheck are as follows.
12    
13  =over 4  =over 4
14    
 =item summary  
   
 Do not display details, only difference summaries.  
   
15  =item nofeats  =item nofeats
16    
17  Do not process features.  Do not process features.
# Line 52  Line 51 
51  Name of the group file (described below). The default is C<groups.tbl>  Name of the group file (described below). The default is C<groups.tbl>
52  in the Sprout data directory.  in the Sprout data directory.
53    
54    =item outFile
55    
56    Output file name. The default is C<html/includes/differences.inc> in the
57    nmpdr C<next> directory.
58    
59  =back  =back
60    
61  =head2 The Group File  =head2 The Group File
# Line 94  Line 98 
98  use FIG;  use FIG;
99  use SFXlate;  use SFXlate;
100  use Sprout;  use Sprout;
101    use CGI;
102    
103  # Get the command-line options and parameters.  # Get the command-line options and parameters.
104  my ($options, @parameters) = StandardSetup([qw(Sprout) ],  my ($options, @parameters) = StandardSetup([qw(Sprout) ],
105                                             {                                             {
106                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],                                                groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
107                                                nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"],                                                nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"],
108                                                trace => ["2-", "tracing level; use a minus to prevent tracing to standard output"],                                                trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
                                               summary => ["", "if specified, detailed lists of the different items will not be displayed"],  
109                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
110                                                  outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
111                                             },                                             },
112                                             "",                                             "",
113                                             @ARGV);                                             @ARGV);
# Line 110  Line 115 
115  my $rtype;  my $rtype;
116  # Insure we catch errors.  # Insure we catch errors.
117  eval {  eval {
118        # Get a CGI object for building the output. We pass it the options hash so
119        # the formatting subroutines have access to it. Also, we want it to know
120        # we're not a real web script.
121        my $cgi = CGI->new($options);
122        # Start accumulating HTML data.
123        my @html = ();
124        # Open the output file. We do this early in case there's a problem.
125        my $outFileName = $options->{outFile};
126        Trace("Opening output file $outFileName.") if T(2);
127        Open(\*OUTPUT, ">$outFileName");
128        # Get a nice-looking version name and make it into a title.
129        my $version = uc $FIG_Config::nmpdr_version;
130        $version =~ tr/_/ /;
131        push @html, $cgi->h4({align => "center"}, "Difference Report for $version.");
132        # Start the table.
133        push @html, $cgi->start_table({align => "center", border => "2"});
134      # Get the group file.      # Get the group file.
135      my $groupFileName = $options->{groupFile};      my $groupFileName = $options->{groupFile};
136      Trace("Reading group file $groupFileName.") if T(2);      Trace("Reading group file $groupFileName.") if T(2);
137      # Well put each group's data into a hash, keyed by group      # We'll put each group's data into a hash, keyed by group
138      # name, each entry being a 3-tuple of page name, genus,      # name, each entry being a 3-tuple of page name, genus,
139      # and species      # and species
140      my %groups = Sprout::ReadGroupFile($groupFileName);      my %groups = Sprout::ReadGroupFile($groupFileName);
# Line 148  Line 169 
169          }          }
170      }      }
171      # Display the lists.      # Display the lists.
172      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Genomes'     => $insertedGenomes,
               'New Genomes'     => $insertedGenomes,  
173                'Deleted Genomes' => $deletedGenomes);                'Deleted Genomes' => $deletedGenomes);
174      # Now the groups.      # Now the groups.
175      Trace("Comparing groups.") if T(2);      Trace("Comparing groups.") if T(2);
# Line 162  Line 182 
182          if (! exists $oldGroups{$newGroup}) {          if (! exists $oldGroups{$newGroup}) {
183              # Construct a list of this group's genes.              # Construct a list of this group's genes.
184              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});              my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
185              ShowLists(! $options->{summary}, "Genomes in new group $newGroup" => \@groupGenomes);              push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
186          } else {          } else {
187              # Here the group is in both versions. Fix the lists and compare them.              # Here the group is in both versions. Fix the lists and compare them.
188              my @newGroupList = NameGenomes($newSprout, $newGroups{$newGroup});              my @newGroupList = NameGenomes($newSprout, $newGroups{$newGroup});
# Line 170  Line 190 
190              Trace("Comparing lists for $newGroup.") if T(4);              Trace("Comparing lists for $newGroup.") if T(4);
191              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList);              my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList);
192              Trace("Comparison complete.") if T(4);              Trace("Comparison complete.") if T(4);
193              # Delete the old group data. When we're done, this means we'll have a list of the deleted              # Delete the old group data. When we're done, this means the hash
194              # groups.              # will contain only the deleted groups.
195              delete $oldGroups{$newGroup};              delete $oldGroups{$newGroup};
196              # Show the lists. Empty lists will not be shown.              # Show the lists. Empty lists will not be shown.
197              Trace("Displaying group lists.") if T(4);              Trace("Displaying group lists.") if T(4);
198              ShowLists(! $options->{summary},              push @html, ShowLists($cgi, "Genomes new to $newGroup"       => $insertedGroupGenomes,
                       "Genomes new to $newGroup" => $insertedGroupGenomes,  
199                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);                        "Genomes no longer in $newGroup" => $deletedGroupGenomes);
200          }          }
201      }      }
# Line 185  Line 204 
204      for my $oldGroup (sort keys %oldGroups) {      for my $oldGroup (sort keys %oldGroups) {
205          Trace("Processing deleted group $oldGroup.") if T(3);          Trace("Processing deleted group $oldGroup.") if T(3);
206          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});          my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
207          ShowLists(! $options->{summary}, "Genomes in deleted group $oldGroup" => \@groupGenomes);          push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
208      }      }
209      # Next, we get the subsystems.      # Next, we get the subsystems.
210      Trace("Processing subsystems.") if T(2);      Trace("Processing subsystems.") if T(2);
# Line 204  Line 223 
223              }              }
224          }          }
225      }      }
226      ShowLists(! $options->{summary},      push @html, ShowLists($cgi, 'New Subsystems'     => $insertedSubs,
               'New Subsystems'     => $insertedSubs,  
227                'Deleted Subsystems' => $deletedSubs);                'Deleted Subsystems' => $deletedSubs);
228      # Next, we show some basic counts.      # Next, we show some basic counts.
229      print "\nStatistics for old Sprout\n\n";      push @html, ShowTitle($cgi, "Statistics for old Sprout");
230      ShowCounts($oldSprout);      push @html, ShowCounts($cgi, $oldSprout);
231      print "\nStatistics for new Sprout\n\n";      push @html, ShowTitle($cgi, "Statistics for new Sprout");
232      ShowCounts($newSprout);      push @html, ShowCounts($cgi, $newSprout);
233      # Now we show the genomes that are not in groups but could be. First, we convert      # Now we show the genomes that are not in groups but could be. First, we convert
234      # our group hash from the new Sprout into the form used on the web site.      # our group hash from the new Sprout into the form used on the web site.
235      Trace("Examining possible missing genomes in groups.") if T(2);      Trace("Examining possible missing genomes in groups.") if T(2);
# Line 244  Line 262 
262          my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);          my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
263          # If anything survived, show the list.          # If anything survived, show the list.
264          if (@leftOut) {          if (@leftOut) {
265              ShowLists(! $options->{summary}, "Candidates for $group" => \@leftOut);              push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
266          }          }
267      }      }
268      # Compare the property tables.      # Compare the property tables.
# Line 257  Line 275 
275      # Now get all the properties in the new Sprout without any features.      # Now get all the properties in the new Sprout without any features.
276      my @emptyProps = grep { $_->[1] == 0 } @newProps;      my @emptyProps = grep { $_->[1] == 0 } @newProps;
277      # Show what we've found.      # Show what we've found.
278      ShowLists(! $options->{summary}, 'New Properties' => $insertedProps,      push @html, ShowLists($cgi, 'New Properties'     => $insertedProps,
279                                       'Deleted Properties' => $deletedProps,                                       'Deleted Properties' => $deletedProps,
280                                       'Empty Properties' => \@emptyProps);                                       'Empty Properties' => \@emptyProps);
281      # Now we process the features of the common genes.      # Now we process the features of the common genes.
# Line 283  Line 301 
301                  # first anyway so the trace tells us what's happening.                  # first anyway so the trace tells us what's happening.
302                  if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {                  if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {
303                      Trace("Displaying feature differences.") if T(3);                      Trace("Displaying feature differences.") if T(3);
304                      ShowLists(! $options->{summary},                      push @html, ShowLists($cgi, "New Features for $genomeID"      => $insertedFeatures,
                               "New Features for $genomeID"      => $insertedFeatures,  
305                                "Features Deleted from $genomeID" => $deletedFeatures);                                "Features Deleted from $genomeID" => $deletedFeatures);
306                  }                  }
307              }              }
308          }          }
309      }      }
310        # Close the table.
311        push @html, $cgi->end_table();
312        # Assemble and write the HTML.
313        Trace("Writing to output.") if T(2);
314        print OUTPUT join("\n", @html, "");
315        # Close the output file.
316        close OUTPUT;
317        Trace("Analysis complete.") if T(2);
318  };  };
319  if ($@) {  if ($@) {
320      Trace("Script failed with error: $@") if T(0);      Trace("Script failed with error: $@") if T(0);
# Line 372  Line 397 
397      # Get the parameters.      # Get the parameters.
398      my ($sprout, $genomes) = @_;      my ($sprout, $genomes) = @_;
399      # Attach the names.      # Attach the names.
400      my @retVal = map { [$_, $sprout->GenusSpecies($_) ] } @{$genomes};      my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
401      # Return the result.      # Return the result.
402      return @retVal;      return @retVal;
403  }  }
# Line 489  Line 514 
514    
515  =head3 ShowLists  =head3 ShowLists
516    
517  C<< ShowLists($all, %lists); >>  C<< my @htmlLines = ShowLists($cgi, %lists); >>
518    
519  Display a set of lists. Each list should consist of 2-tuples.  Display a set of lists. Each list should consist of 2-tuples, and the list
520    entries will be displayed as 2-element table rows with a header row.
521    
522  =over 4  =over 4
523    
524  =item all  =item cgi
525    
526  TRUE if details should be displayed; FALSE if only summaries should be displayed.  A CGI query object containing the options for this program. It is also used to format
527    HTML.
528    
529  =item lists  =item lists
530    
531  A hash mapping list names to list references.  A hash mapping list names to list references.
532    
533    =item RETURN
534    
535    Returns a list of HTML lines displaying the list in tabular form.
536    
537  =back  =back
538    
539  =cut  =cut
540    
541  sub ShowLists {  sub ShowLists {
542      # Get the parameters.      # Get the parameters.
543      my $all = shift @_;      my $cgi = shift @_;
544      my %lists = @_;      my %lists = @_;
545        # Declare the return variable. The HTML lines will be accumulated
546        # in here and then joined with new-lines.
547        my @retVal = ();
548      # Loop through the lists in alphabetical order by list name.      # Loop through the lists in alphabetical order by list name.
549      for my $listName (sort keys %lists) {      for my $listName (sort keys %lists) {
550          # Get the list itself.          # Get the list itself.
# Line 519  Line 553 
553          my $listSize = scalar @{$list};          my $listSize = scalar @{$list};
554          # Only proceed if the list is nonempty.          # Only proceed if the list is nonempty.
555          if ($listSize > 0) {          if ($listSize > 0) {
556              my $header = ShowHeader($listName, $listSize);              my $header = ComputeHeader($listName, $listSize);
             print "$header\n";  
557              Trace($header) if T(3);              Trace($header) if T(3);
558              # If we're at trace level 3, display the list.              # Display the header line as a header.
559              if ($all) {              push @retVal, ShowTitle($cgi, $header);
560                  # Put a spacer under the title.              # Now display the list as table rows.
                 print "\n";  
                 # Get the width of the name column.  
                 my $width = 0;  
                 for my $entryLen (map { length $_->[0] } @{$list}) {  
                     $width = $entryLen if $entryLen > $width;  
                 }  
                 # Now display the list.  
561                  for my $entry (@{$list}) {                  for my $entry (@{$list}) {
562                      my ($name, $data) = @{$entry};                      my ($name, $data) = @{$entry};
563                      print "    $name" . (" " x ($width - length $name)) . " $data\n";                  push @retVal, $cgi->Tr($cgi->td($name), $cgi->td({align => "right"}, $data));
                 }  
                 print "\n\n";  
564              }              }
565          }          }
566      }      }
567        # Return the list of HTML lines.
568        return @retVal;
569  }  }
570    
571  =head3 ShowHeader  =head3 ComputeHeader
572    
573  C<< my $header = ShowHeader($name, $count); >>  C<< my $header = ComputeHeader($name, $count); >>
574    
575  Return a list header for a list of the specified length.  Return a list header for a list of the specified length.
576    
# Line 566  Line 592 
592    
593  =cut  =cut
594    
595  sub ShowHeader {  sub ComputeHeader {
596      # Get the parameters.      # Get the parameters.
597      my ($name, $count) = @_;      my ($name, $count) = @_;
598      # Declare the return variable.      # Declare the return variable.
599      my $retVal;      my $retVal;
600      if ($count == 0) {      if ($count == 0) {
601          $retVal = "*** $name: none";          $retVal = "$name: none";
602      } elsif ($count == 1) {      } elsif ($count == 1) {
603          $retVal = "*** $name: one";          $retVal = "$name: one";
604      } else {      } else {
605          $retVal = "*** $name: $count";          $retVal = "$name: $count";
606      }      }
607      # Return the result.      # Return the result.
608      return $retVal;      return $retVal;
# Line 591  Line 617 
617    
618  =over 4  =over 4
619    
620    =item cgi
621    
622    CGI query object used to format the output.
623    
624  =item sprout  =item sprout
625    
626  Sprout instance for which counts are to be produced.  Sprout instance for which counts are to be produced.
627    
628    =item RETURN
629    
630    Returns a list of HTML lines with the counts arranged in table rows.
631    
632  =back  =back
633    
634  =cut  =cut
635    
636  sub ShowCounts {  sub ShowCounts {
637      # Get the parameters.      # Get the parameters.
638      my ($sprout) = @_;      my ($cgi, $sprout) = @_;
639      # Count genomes and subsystems.      # Count genomes and subsystems.
640      my $genomes = $sprout->GetCount(['Genome']);      my $genomes = $sprout->GetCount(['Genome']);
641      my $subsystems = $sprout->GetCount(['Subsystem']);      my $subsystems = $sprout->GetCount(['Subsystem']);
# Line 609  Line 643 
643      my $roles = $sprout->GetCount(['OccursInSubsystem']);      my $roles = $sprout->GetCount(['OccursInSubsystem']);
644      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);      my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
645      my $couples = $sprout->GetCount(['Coupling']);      my $couples = $sprout->GetCount(['Coupling']);
646      # Count features and BBHs.      # Count features.
647      my $features = $sprout->GetCount(['Feature']);      my $features = $sprout->GetCount(['Feature']);
648      # Display the counts.      # Display the counts.
649      print "Genomes = $genomes.\n";      my @retVal = ();
650      print "Subsystems = $subsystems.\n";      push @retVal, $cgi->Tr($cgi->td("Genomes"), $cgi->td({ align => "right" }, $genomes));
651      print "Roles = $roles.\n";      push @retVal, $cgi->Tr($cgi->td("Subsystems"), $cgi->td({ align => "right" }, $subsystems));
652      print "External function assignments = $funcs.\n";      push @retVal, $cgi->Tr($cgi->td("Roles"), $cgi->td({ align => "right" }, $roles));
653      print "Features = $features.\n";      push @retVal, $cgi->Tr($cgi->td("External function assignments"), $cgi->td({ align => "right" }, $funcs));
654      print "Functional couplings = $couples.\n";      push @retVal, $cgi->Tr($cgi->td("Features"), $cgi->td({ align => "right" }, $features));
655      print "\n";      push @retVal, $cgi->Tr($cgi->td("Functional Coupling"), $cgi->td({ align => "right" }, $couples));
656        # Return the html.
657        return @retVal;
658    }
659    
660    =head3 ShowTitle
661    
662    C<< my $html = ShowTitle($cgi, $title); >>
663    
664    Display a title line. This will be a merged table row with bolded text.
665    
666    =over 4
667    
668    =item cgi
669    
670    CGI query object used to generate HTML output.
671    
672    =item RETURN
673    
674    Returns the HTML text.
675    
676    =back
677    
678    =cut
679    
680    sub ShowTitle {
681        # Get the parameters.
682        my ($cgi, $title) = @_;
683        # Declare the return variable.
684        my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
685        # Return the result.
686        return $retVal;
687  }  }
688    
689  1;  1;

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.16

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3