[Bio] / Sprout / GenomeStats.pl Repository:
ViewVC logotype

Diff of /Sprout/GenomeStats.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Sun Jun 18 07:12:58 2006 UTC revision 1.21, Sun Aug 27 00:05:04 2006 UTC
# Line 61  Line 61 
61    
62  Style to use for small-text markers (e.g. NEW!)  Style to use for small-text markers (e.g. NEW!)
63    
64    =item numStyle
65    
66    Style to use for numeric cells.
67    
68    =item counterStyle
69    
70    Style to use for counter cells.
71    
72  =item linkCGI  =item linkCGI
73    
74  Path to the CGI script for displaying detailed statistics.  Path to the CGI script for displaying detailed statistics.
# Line 90  Line 98 
98                                              evenStyle => ['even', 'style for even rows'],                                              evenStyle => ['even', 'style for even rows'],
99                                              tableStyle => ['genomestats', 'style for whole table'],                                              tableStyle => ['genomestats', 'style for whole table'],
100                                              markerStyle => ['tinytext', 'style for markers'],                                              markerStyle => ['tinytext', 'style for markers'],
101                                                numStyle => ['numcell', 'style for cells with numeric values'],
102                                                counterStyle => ['countercell', 'style for cells with counter values'],
103                                              linkCGI => ['../FIG/genome_statistics.cgi',                                              linkCGI => ['../FIG/genome_statistics.cgi',
104                                                          'path to CGI script for detailed statistics'],                                                          'path to CGI script for detailed statistics'],
105                                             },                                             },
# Line 102  Line 112 
112  } elsif (! -d $targetDir) {  } elsif (! -d $targetDir) {
113      Confess("Target directory $targetDir not found.");      Confess("Target directory $targetDir not found.");
114  } else {  } else {
115      # *Get the old Sprout.      # Get the old Sprout.
116      my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);      my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);
117      # Extract the genome group data from the old Sprout.      # Extract the genome group data from the old Sprout.
118      my %oldGroupHash = $oldSprout->GetGroups();      my %oldGroupHash = $oldSprout->GetGroups();
# Line 127  Line 137 
137              %oldGenomes = map { $_ => 1 } @{$oldGroupHash{$groupID}};              %oldGenomes = map { $_ => 1 } @{$oldGroupHash{$groupID}};
138          }          }
139          # Create the output file.          # Create the output file.
140          Open(\*GROUPFILE, ">$targetDir/$groupID.inc");          my $outFileName = "stats-" . lc($groupID) . ".inc";
141            Open(\*GROUPFILE, ">$targetDir/$outFileName");
142          # Get the styles.          # Get the styles.
143          my ($tableStyle, $markerStyle, @rowStyle) = ($options->{tableStyle}, $options->{markerStyle},          my ($tableStyle, $markerStyle, @rowStyle) = ($options->{tableStyle}, $options->{markerStyle},
144                                                       $options->{evenStyle}, $options->{oddStyle});                                                       $options->{evenStyle}, $options->{oddStyle});
145            my ($numStyle, $counterStyle) = ($options->{numStyle}, $options->{counterStyle});
146          # Start the table.          # Start the table.
147          print GROUPFILE "<table class=\"$tableStyle\">\n";          print GROUPFILE "<table class=\"$tableStyle\">\n";
148          # Create the header row.          # Create the header row.
149          print GROUPFILE Tr( { class => 'odd' }, th("Strain annotated in NMPDR",          print GROUPFILE Tr( { class => 'odd' }, th(["Strain annotated in NMPDR",
150                                                   "Genome size, bp",                                                   "Genome size, bp",
151                                                   "Protein Encoding Genes (PEGs)",                                                   "Protein Encoding Genes (PEGs)",
152                                                   "Named genes in subsystems",            # s0                                                   "Named genes in subsystems",            # s0
153                                                   "Named genes not in subsystems",        # n0                                                   "Named genes not in subsystems",        # n0
154                                                   "Hypothetical genes in subsystems",     # s1                                                   "Hypothetical genes in subsystems",     # s1
155                                                   "Hypothetical genes not in subsystems", # n1                                                   "Hypothetical genes not in subsystems", # n1
156                                                   "RNAs")) . "\n";                                                   "Subsystems",
157                                                     "RNAs",
158                                                       ])) . "\n";
159          # Set up some useful stuff for the four count columns.          # Set up some useful stuff for the four count columns.
160          my %linkParms = ( s0 => "nohypo_sub", n0 => "nohypo_nosub",          my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",
161                            s1 => "hypo_sub", n1 => "hypo_nosub" );                            s1 => "hypo_sub", n1 => "hypo_nosub" );
162          my @columnTypes = ('s0', 'n0', 's1', 'n1');          my @columnTypes = ('s0', 'n0', 's1', 'n1');
163          # The data rows will be built next. We'll be putting them into a hash keyed by          # The data rows will be built next. We'll be putting them into a hash keyed by
# Line 161  Line 175 
175                  $new = " <span class=\"$markerStyle\">NEW!</span>";                  $new = " <span class=\"$markerStyle\">NEW!</span>";
176              }              }
177              # Get the genome length.              # Get the genome length.
178              my $genomeLen = $sprout->GenomeLength($genomeID);              my $genomeLen = Tracer::CommaFormat($sprout->GenomeLength($genomeID));
179              # Get the number of PEGs.              # Get the number of PEGs.
180              my $pegCount = $sprout->FeatureCount($genomeID, 'peg');              my $pegCount = Tracer::CommaFormat($sprout->FeatureCount($genomeID, 'peg'));
181              # Get the number of RNAs.              # Get the number of RNAs.
182              my $rnaCount = $sprout->FeatureCount($genomeID, 'rna');              my $rnaCount = Tracer::CommaFormat($sprout->FeatureCount($genomeID, 'rna'));
183                # If there are no RNAs, we say we don't know the number, since we know there
184                # must be RNAs somewhere.
185                if (! $rnaCount) {
186                    $rnaCount = "n/d";
187                }
188              # Now we have four categories of features to work with, for each              # Now we have four categories of features to work with, for each
189              # combination of named or hypothetical vs. in-subsystem or              # combination of named or hypothetical vs. in-subsystem or
190              # not-in-subsystem. First, we get all of the feature assignments for              # not-in-subsystem. First, we get all of the feature assignments for
191              # the genome.              # the genome.
192              my $assignHash = $sprout->GenomeAssignments($genomeID);              my $assignHash = $sprout->GenomeAssignments($genomeID);
193              # Next, we get all of the features in the genome that belong to a              # Next, we get all of the features in the genome that belong to a
194              # subsystem. This involves a query via the subsystem spreadsheet.              # subsystem.
195              my %ssHash = map { $_ => 1 } $sprout->GetFlat(['IsGenomeOf', 'ContainsFeature'],              my %ssHash = $sprout->GenomeSubsystemData($genomeID);
                                                     "IsGenomeOf(from-link) = ?",  
                                                     [$genomeID], 'ContainsFeature(to-link)');  
196              # Create a hash to track the four categories. "s" or "n" indicates              # Create a hash to track the four categories. "s" or "n" indicates
197              # in or out of a subsystem. "1" or "0" indicates hypothetical or              # in or out of a subsystem. "1" or "0" indicates hypothetical or
198              # real.              # real.
# Line 190  Line 207 
207                  $counters{$ss} += 1;                  $counters{$ss} += 1;
208                  $totalFeatures++;                  $totalFeatures++;
209              }              }
210              Trace("$totalFeatures total feature found for $genomeID.") if T(3);              Trace("$totalFeatures total features found for $genomeID.") if T(3);
211              # We have all our data. Next we need to compute the percentages and the links.              # We have all our data. Next we need to compute the percentages and the links.
212              # First, the link stuff.              # First, the link stuff.
213              my $linkPrefix = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&request=";              my $linkPrefix = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&request=";
# Line 198  Line 215 
215              for my $type (keys %linkParms) {              for my $type (keys %linkParms) {
216                  $counters{$type} = a( { href => "$linkPrefix$linkParms{$type}" },                  $counters{$type} = a( { href => "$linkPrefix$linkParms{$type}" },
217                                       sprintf("%d(%.1f%%)", $counters{$type},                                       sprintf("%d(%.1f%%)", $counters{$type},
218                                               $counters{$type} * 100 / $totalFeatures));                                               Tracer::Percent($counters{$type}, $totalFeatures)));
219              }              }
220              # Create the row text.              my @counterValues = map { $counters{$_} } @columnTypes;
221              my $rowHtml = td( "$genomeName$new", $genomeLen, $pegCount,              # The last link is a button to look at the subsystem summaries.
222                                map { $counters{$_} } @columnTypes,              my $ssCount = $sprout->GetCount(['ParticipatesIn'], 'ParticipatesIn(from-link) = ?',
223                                $rnaCount );                                                 [$genomeID]);
224                my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";
225                my $ssCol = "<a href=\"$ssLink\">$ssCount</a>";
226                # Create the row text. Note that we use the distributive capability of the TD
227                # function to apply the same style to each one.
228                my $rowHtml = join("",
229                                   td("$genomeName$new"),
230                                   td({ class => $numStyle }, $genomeLen),
231                                   td({ class => $numStyle }, $pegCount),
232                                   td({ class => $counterStyle }, \@counterValues),
233                                   td({ class => $numStyle }, $ssCol),
234                                   td({ class => $numStyle }, $rnaCount),
235                                  );
236              # Put it in the row hash.              # Put it in the row hash.
237              $rows{$genomeName} = $rowHtml;              $rows{$genomeName} = $rowHtml;
238          }          }
# Line 222  Line 251 
251              # Count the row.              # Count the row.
252              $rowCount++;              $rowCount++;
253          }          }
254          # All done, close the file.          # All done, terminate the table and close the file.
255            print GROUPFILE "</table>\n";
256          close GROUPFILE;          close GROUPFILE;
257          Trace("$rowCount genomes processed.") if T(2);          Trace("$rowCount genomes processed.") if T(2);
258      }      }
# Line 265  Line 295 
295              $realGroupID = $1;              $realGroupID = $1;
296          }          }
297          # Append this group's genomes into the result hash.          # Append this group's genomes into the result hash.
298          Tracer::AddToListMap(\%retVal, $realGroupID, $groupHash{$groupID});          Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
299      }      }
300      # Return the result hash.      # Return the result hash.
301      return %retVal;      return %retVal;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3