[Bio] / Sprout / GenomeStats.pl Repository:
ViewVC logotype

Diff of /Sprout/GenomeStats.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.25, Wed Oct 4 16:02:41 2006 UTC revision 1.28, Tue Apr 10 06:01:56 2007 UTC
# Line 6  Line 6 
6  the genomes in each of the genome groups. Genomes that are new to this version  the genomes in each of the genome groups. Genomes that are new to this version
7  of the Sprout will be specially marked. In order for this to work, both the  of the Sprout will be specially marked. In order for this to work, both the
8  current and previous Sprout databases must be available on this machine.  current and previous Sprout databases must be available on this machine.
 This is one positional parameter: the name of a directory in which to place  
 the include files.  
9    
10  The currently-supported command-line options are as follows.  The currently-supported command-line options are as follows.
11    
# Line 111  Line 109 
109                                              linkCGI => ['../FIG/genome_statistics.cgi',                                              linkCGI => ['../FIG/genome_statistics.cgi',
110                                                          'path to CGI script for detailed statistics'],                                                          'path to CGI script for detailed statistics'],
111                                              groupFile => ["$FIG_Config::sproutData/groups.tbl",                                              groupFile => ["$FIG_Config::sproutData/groups.tbl",
112                                                            "location of the NMPDR group description file"],                                                            'location of the NMPDR group description file'],
113                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],                                              noNewCheck => [0, 'if specified, skips the check for new genomes'],
114                                                targetDir => ["$FIG_Config::nmpdr_base/next/html/includes",
115                                                              'target directory'],
116                                              },                                              },
117                                             "<targetDir>",                                             "",
118                                             @ARGV);                                             @ARGV);
119  # Verify the directory name.  # Verify the directory name.
120  my $targetDir = $parameters[0];  my $targetDir = $options->{targetDir};
121  if (! $targetDir) {  if (! $targetDir) {
122      Confess("No target directory specified.");      Confess("No target directory specified.");
123  } elsif (! -d $targetDir) {  } elsif (! -d $targetDir) {
# Line 138  Line 138 
138          %oldGroupHash = map { $_ => $newGroupHash{$_} } keys %newGroupHash;          %oldGroupHash = map { $_ => $newGroupHash{$_} } keys %newGroupHash;
139      } else {      } else {
140          # Get the old Sprout.          # Get the old Sprout.
141          my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);          my $oldSprout = SFXlate->old_sprout_only();
142          # Extract the genome group data from the old Sprout.          # Extract the genome group data from the old Sprout.
143          my %oldGroupHash = $oldSprout->GetGroups();          %oldGroupHash = $oldSprout->GetGroups();
144          if (! $options->{strict}) {          if (! $options->{strict}) {
145              %oldGroupHash = Sprout::Fix(%oldGroupHash);              %oldGroupHash = Sprout::Fix(%oldGroupHash);
146          }          }
147      }      }
148        # Get a FIG object for computing attributes.
149        my $fig = FIG->new();
150      # Read the group file.      # Read the group file.
151      my %groupData = Sprout::ReadGroupFile($options->{groupFile});      my %groupData = Sprout::ReadGroupFile($options->{groupFile});
152      # Set up some useful stuff for the four count columns.      # Set up some useful stuff for the four count columns.
# Line 174  Line 176 
176          # Create the output file.          # Create the output file.
177          my $outFileName = "stats-" . lc($groupID) . ".inc";          my $outFileName = "stats-" . lc($groupID) . ".inc";
178          Open(\*GROUPFILE, ">$targetDir/$outFileName");          Open(\*GROUPFILE, ">$targetDir/$outFileName");
179          # Start the table.          # Get the serotypes.
180          print GROUPFILE "<table class=\"$tableStyle\">\n";          my %serotypes = map { $_->[0] => $_->[2] } $fig->get_attributes(\@newGenomes, "Serotype_code");
181          # Create the header row.          my $hasSeroData = (scalar(keys %serotypes) > 0);
182          print GROUPFILE Tr( { class => 'odd' }, th(["Strain annotated in NMPDR",          # If we have serotypes, we add an extra column.
183                                                   "Genome size, bp",          my @columnNames = "Strain annotated in NMPDR";
184            if ($hasSeroData) {
185                push @columnNames, "Serotype";
186            }
187            push @columnNames,  "Genome size, bp",
188                                                   "Protein Encoding Genes (PEGs)",                                                   "Protein Encoding Genes (PEGs)",
189                                                   "Named genes in subsystems",            # s0                                                   "Named genes in subsystems",            # s0
190                                                   "Named genes not in subsystems",        # n0                                                   "Named genes not in subsystems",        # n0
191                                                   "Hypothetical genes in subsystems",     # s1                                                   "Hypothetical genes in subsystems",     # s1
192                                                   "Hypothetical genes not in subsystems", # n1                                                   "Hypothetical genes not in subsystems", # n1
193                                                   "Subsystems",                                                   "Subsystems",
194                                                   "RNAs",                              "RNAs";
195                                                     ])) . "\n";          # Start the table.
196            print GROUPFILE "<table class=\"$tableStyle\">\n";
197            # Create the header row.
198            print GROUPFILE Tr( { class => 'odd' }, th(\@columnNames)) . "\n";
199          # The data rows will be built next. We'll be putting them into a hash keyed by          # The data rows will be built next. We'll be putting them into a hash keyed by
200          # organism name. The hash enables us to spit them out sorted by name.          # organism name. The hash enables us to spit them out sorted by name.
201          my %rows = ();          my %rows = ();
# Line 201  Line 210 
210              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);              Trace("Processing ${new}genome $genomeID for $groupID.") if T(3);
211              # Get the strain name.              # Get the strain name.
212              my $genomeName = $sprout->GenusSpecies($genomeID);              my $genomeName = $sprout->GenusSpecies($genomeID);
213                # Apply a link.
214                my $genomeText = CGI::a({ href => "../FIG/genome_statistics.cgi?genome=$genomeID;SPROUT=1" }, $genomeName);
215              # If this is a new strain, build the HTML for the NEW! mark.              # If this is a new strain, build the HTML for the NEW! mark.
216              if ($new) {              if ($new) {
217                  $new = " <span class=\"$markerStyle\">NEW!</span>";                  $new = " <span class=\"$markerStyle\">NEW!</span>";
# Line 263  Line 274 
274                                                 [$genomeID]);                                                 [$genomeID]);
275              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";              my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";
276              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";              my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";
277              # Create the row text. Note that we use the distributive capability of the TD              # Start creating the table cells.
278              # function to apply the same style to each one.              my $rowHtml = td("$genomeText$new");
279              my $rowHtml = join("",              # Check for a serotype.
280                                 td("$genomeName$new"),              if ($hasSeroData) {
281                    my $seroType = $serotypes{$genomeID} || "&nbsp;";
282                    $rowHtml .= td($seroType);
283                }
284                # Now add the data columns.
285                $rowHtml .= join("",
286                                 td({ class => $numStyle }, $genomeLen),                                 td({ class => $numStyle }, $genomeLen),
287                                 td({ class => $numStyle }, $pegCount),                                 td({ class => $numStyle }, $pegCount),
288                                 td({ class => $counterStyle }, \@counterValues),                                 td({ class => $counterStyle }, \@counterValues),

Legend:
Removed from v.1.25  
changed lines
  Added in v.1.28

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3