[Bio] / Sprout / GenomeStats.pl Repository:
ViewVC logotype

Diff of /Sprout/GenomeStats.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26, Sun Oct 8 05:44:55 2006 UTC revision 1.31, Tue Feb 5 05:47:32 2008 UTC
# Line 6  Line 6 
6  the genomes in each of the genome groups. Genomes that are new to this version  the genomes in each of the genome groups. Genomes that are new to this version
7  of the Sprout will be specially marked. In order for this to work, both the  of the Sprout will be specially marked. In order for this to work, both the
8  current and previous Sprout databases must be available on this machine.  current and previous Sprout databases must be available on this machine.
 This is one positional parameter: the name of a directory in which to place  
 the include files.  
9    
10  The currently-supported command-line options are as follows.  The currently-supported command-line options are as follows.
11    
# Line 86  Line 84 
84    
85  use strict;  use strict;
86  use Tracer;  use Tracer;
 use DocUtils;  
 use TestUtils;  
87  use Cwd;  use Cwd;
88  use File::Copy;  use File::Copy;
89  use File::Path;  use File::Path;
# Line 95  Line 91 
91  use SFXlate;  use SFXlate;
92  use CGI qw(:standard);  use CGI qw(:standard);
93  use FIG;  use FIG;
 no warnings 'once'; # only when coding  
94    
95  # Get the command-line options and parameters.  # Get the command-line options and parameters.
96  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],
# Line 110  Line 105 
105                                              counterStyle => ['countercell', 'style for cells with counter values'],                                              counterStyle => ['countercell', 'style for cells with counter values'],
106                                              linkCGI => ['../FIG/genome_statistics.cgi',                                              linkCGI => ['../FIG/genome_statistics.cgi',
107                                                          'path to CGI script for detailed statistics'],                                                          'path to CGI script for detailed statistics'],
                                             groupFile => ["$FIG_Config::sproutData/groups.tbl",  
                                                           'location of the NMPDR group description file'],  
108                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],
109                                              targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",                                              targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",
110                                                            'target directory'],                                                            'target directory'],
111                                              },                                              },
112                                             "",                                             "",
113                                             @ARGV);                                             @ARGV);
114    # The return type (error/no error) goes in here.
115    my $rtype;
116    eval {
117        # This table controls the special attribute columns. For each we need to know the attribute name and the
118        # column title. If any genomes in a group have a value for one of the special columns, that column is
119        # displayed along with the attribute values.
120        my %specialCols = (Serotype => 'Serotype_code',
121                           Phenotype => 'Phenotype');
122  # Verify the directory name.  # Verify the directory name.
123  my $targetDir = $options->{targetDir};  my $targetDir = $options->{targetDir};
124  if (! $targetDir) {  if (! $targetDir) {
# Line 130  Line 131 
131      my %newGroupHash = $sprout->GetGroups();      my %newGroupHash = $sprout->GetGroups();
132      # Extract the genome group data from the new Sprout.      # Extract the genome group data from the new Sprout.
133      if (! $options->{strict}) {      if (! $options->{strict}) {
134          %newGroupHash = Sprout::Fix(%newGroupHash);              %newGroupHash = $sprout->Fix(%newGroupHash);
135      }      }
136      # This hash will be used to determine which genomes are new.      # This hash will be used to determine which genomes are new.
137      my %oldGroupHash = ();      my %oldGroupHash = ();
# Line 140  Line 141 
141          %oldGroupHash = map { $_ => $newGroupHash{$_} } keys %newGroupHash;          %oldGroupHash = map { $_ => $newGroupHash{$_} } keys %newGroupHash;
142      } else {      } else {
143          # Get the old Sprout.          # Get the old Sprout.
144          my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);              my $oldSprout = SFXlate->old_sprout_only();
145          # Extract the genome group data from the old Sprout.          # Extract the genome group data from the old Sprout.
146          %oldGroupHash = $oldSprout->GetGroups();          %oldGroupHash = $oldSprout->GetGroups();
147          if (! $options->{strict}) {          if (! $options->{strict}) {
148              %oldGroupHash = Sprout::Fix(%oldGroupHash);                  %oldGroupHash = $oldSprout->Fix(%oldGroupHash);
149          }          }
150      }      }
151      # Read the group file.          # Get a FIG object for computing attributes.
152      my %groupData = Sprout::ReadGroupFile($options->{groupFile});          my $fig = FIG->new();
153            # Get the super-group list.
154            my @superGroups = sort keys %newGroupHash;
155      # Set up some useful stuff for the four count columns.      # Set up some useful stuff for the four count columns.
156      my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",      my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",
157                        s1 => "hypo_sub", n1 => "hypo_nosub" );                        s1 => "hypo_sub", n1 => "hypo_nosub" );
# Line 160  Line 163 
163      # Prepare a hash for the summary counters. These will be used on the organism summary page.      # Prepare a hash for the summary counters. These will be used on the organism summary page.
164      my %summaries = ();      my %summaries = ();
165      # Loop through the groups.      # Loop through the groups.
166      for my $groupID (keys %newGroupHash) {          for my $groupID (@superGroups) {
167          Trace("Processing group $groupID.") if T(2);          Trace("Processing group $groupID.") if T(2);
168          # Create a hash for summarizing the counters.          # Create a hash for summarizing the counters.
169          my %groupTotals = ( genomes => 0, pegs => 0, RNAs => 0,          my %groupTotals = ( genomes => 0, pegs => 0, RNAs => 0,
# Line 176  Line 179 
179          # Create the output file.          # Create the output file.
180          my $outFileName = "stats-" . lc($groupID) . ".inc";          my $outFileName = "stats-" . lc($groupID) . ".inc";
181          Open(\*GROUPFILE, ">$targetDir/$outFileName");          Open(\*GROUPFILE, ">$targetDir/$outFileName");
182          # Start the table.              # Get the special columns. We'll stuff them in a hash keyed by column name. Each column name will contain
183          print GROUPFILE "<table class=\"$tableStyle\">\n";              # a sub-hash that translates each genome ID to its applicable attribute value (if any).
184          # Create the header row.              my %specialData = ();
185          print GROUPFILE Tr( { class => 'odd' }, th(["Strain annotated in NMPDR",              for my $specialColumn (keys %specialCols) {
186                                                   "Genome size, bp",                  # Get the attribute mapping.
187                    my %specialDataList = map { $_->[0] => $_->[2] } $fig->get_attributes(\@newGenomes, $specialCols{$specialColumn});
188                    # We only proceed if some attributes were found. As a result, the keys in %specialData will only be keys
189                    # for columns that exist in the output.
190                    if (scalar(keys %specialDataList)) {
191                        $specialData{$specialColumn} = \%specialDataList;
192                    }
193                }
194                # Set up the column names.
195                my @columnNames = "Strain annotated in NMPDR";
196                push @columnNames, sort keys %specialData;
197                push @columnNames,  "Genome size, bp",
198                                                   "Protein Encoding Genes (PEGs)",                                                   "Protein Encoding Genes (PEGs)",
199                                                   "Named genes in subsystems",            # s0                                                   "Named genes in subsystems",            # s0
200                                                   "Named genes not in subsystems",        # n0                                                   "Named genes not in subsystems",        # n0
201                                                   "Hypothetical genes in subsystems",     # s1                                                   "Hypothetical genes in subsystems",     # s1
202                                                   "Hypothetical genes not in subsystems", # n1                                                   "Hypothetical genes not in subsystems", # n1
203                                                   "Subsystems",                                                   "Subsystems",
204                                                   "RNAs",                                  "RNAs";
205                                                     ])) . "\n";              # Start the table.
206                print GROUPFILE "<table class=\"$tableStyle\">\n";
207                # Create the header row.
208                print GROUPFILE Tr( { class => 'odd' }, th(\@columnNames)) . "\n";
209          # The data rows will be built next. We'll be putting them into a hash keyed by          # The data rows will be built next. We'll be putting them into a hash keyed by
210          # organism name. The hash enables us to spit them out sorted by name.          # organism name. The hash enables us to spit them out sorted by name.
211          my %rows = ();          my %rows = ();
# Line 203  Line 220 
220              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);
221              # Get the strain name.              # Get the strain name.
222              my $genomeName = $sprout->GenusSpecies($genomeID);              my $genomeName = $sprout->GenusSpecies($genomeID);
223                    # Apply a link.
224                    my $genomeText = CGI::a({ href => "../FIG/genome_statistics.cgi?genome=$genomeID;SPROUT=1" }, $genomeName);
225              # If this is a new strain, build the HTML for the NEW! mark.              # If this is a new strain, build the HTML for the NEW! mark.
226              if ($new) {              if ($new) {
227                  $new = " <span class=\"$markerStyle\">NEW!</span>";                  $new = " <span class=\"$markerStyle\">NEW!</span>";
# Line 265  Line 284 
284                                                 [$genomeID]);                                                 [$genomeID]);
285              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";
286              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";
287              # Create the row text. Note that we use the distributive capability of the TD                  # Start creating the table cells.
288              # function to apply the same style to each one.                  my $rowHtml = td("$genomeText$new");
289              my $rowHtml = join("",                  # Add any special columns.
290                                 td("$genomeName$new"),                  for my $specialCol (keys %specialData) {
291                        # Here we get the attribute value. If there is none, we leave the column blank.
292                        my $attribute = $specialData{$specialCol}->{$genomeID} || "&nbsp;";
293                        $rowHtml .= td($attribute);
294                    }
295                    # Now add the data columns.
296                    $rowHtml .= join("",
297                                 td({ class => $numStyle }, $genomeLen),                                 td({ class => $numStyle }, $genomeLen),
298                                 td({ class => $numStyle }, $pegCount),                                 td({ class => $numStyle }, $pegCount),
299                                 td({ class => $counterStyle }, \@counterValues),                                 td({ class => $counterStyle }, \@counterValues),
# Line 321  Line 346 
346      for my $groupName (sort keys %summaries) {      for my $groupName (sort keys %summaries) {
347          my $group = $summaries{$groupName};          my $group = $summaries{$groupName};
348          # Compute the link for the current group.          # Compute the link for the current group.
349          my $groupLink = a({ href => $groupData{$groupName}->[0] }, $groupName);              my $groupLink = a({ href => $sprout->GroupPageName($groupName) }, $groupName);
350          # Create the table row.          # Create the table row.
351          my $rowHtml = join("",          my $rowHtml = join("",
352                             td($groupLink),                             td($groupLink),
# Line 340  Line 365 
365      # We're all done.      # We're all done.
366      Trace("Processing complete.") if T(2);      Trace("Processing complete.") if T(2);
367  }  }
368    };
369    if ($@) {
370        Trace("Stats failed with error: $@") if T(0);
371        $rtype = "error";
372    } else {
373        Trace("Stats complete.") if T(2);
374        $rtype = "no error";
375    }
376    if ($options->{phone}) {
377        my $msgID = Tracer::SendSMS($options->{phone}, "GenomeStats terminated with $rtype.");
378        if ($msgID) {
379            Trace("Phone message sent with ID $msgID.") if T(2);
380        } else {
381            Trace("Phone message not sent.") if T(2);
382        }
383    }
384    
385  1;  1;

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3