--- GenomeStats.pl 2006/06/18 07:53:45 1.8
+++ GenomeStats.pl 2006/08/24 17:29:45 1.19
@@ -61,6 +61,14 @@
Style to use for small-text markers (e.g. NEW!)
+=item numStyle
+
+Style to use for numeric cells.
+
+=item counterStyle
+
+Style to use for counter cells.
+
=item linkCGI
Path to the CGI script for displaying detailed statistics.
@@ -90,6 +98,8 @@
evenStyle => ['even', 'style for even rows'],
tableStyle => ['genomestats', 'style for whole table'],
markerStyle => ['tinytext', 'style for markers'],
+ numStyle => ['numcell', 'style for cells with numeric values'],
+ counterStyle => ['countercell', 'style for cells with counter values'],
linkCGI => ['../FIG/genome_statistics.cgi',
'path to CGI script for detailed statistics'],
},
@@ -102,7 +112,7 @@
} elsif (! -d $targetDir) {
Confess("Target directory $targetDir not found.");
} else {
- # *Get the old Sprout.
+ # Get the old Sprout.
my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);
# Extract the genome group data from the old Sprout.
my %oldGroupHash = $oldSprout->GetGroups();
@@ -127,23 +137,27 @@
%oldGenomes = map { $_ => 1 } @{$oldGroupHash{$groupID}};
}
# Create the output file.
- Open(\*GROUPFILE, ">$targetDir/$groupID.inc");
+ my $outFileName = "stats-" . lc($groupID) . ".inc";
+ Open(\*GROUPFILE, ">$targetDir/$outFileName");
# Get the styles.
my ($tableStyle, $markerStyle, @rowStyle) = ($options->{tableStyle}, $options->{markerStyle},
$options->{evenStyle}, $options->{oddStyle});
+ my ($numStyle, $counterStyle) = ($options->{numStyle}, $options->{counterStyle});
# Start the table.
print GROUPFILE "
\n";
# Create the header row.
- print GROUPFILE Tr( { class => 'odd' }, th("Strain annotated in NMPDR",
+ print GROUPFILE Tr( { class => 'odd' }, th(["Strain annotated in NMPDR",
"Genome size, bp",
"Protein Encoding Genes (PEGs)",
"Named genes in subsystems", # s0
"Named genes not in subsystems", # n0
"Hypothetical genes in subsystems", # s1
"Hypothetical genes not in subsystems", # n1
- "RNAs")) . "\n";
+ "",
+ "RNAs",
+ ])) . "\n";
# Set up some useful stuff for the four count columns.
- my %linkParms = ( s0 => "nohypo_sub", n0 => "nohypo_nosub",
+ my %linkParms = ( s0 => "nothypo_sub", n0 => "nothypo_nosub",
s1 => "hypo_sub", n1 => "hypo_nosub" );
my @columnTypes = ('s0', 'n0', 's1', 'n1');
# The data rows will be built next. We'll be putting them into a hash keyed by
@@ -161,21 +175,24 @@
$new = " NEW!";
}
# Get the genome length.
- my $genomeLen = $sprout->GenomeLength($genomeID);
+ my $genomeLen = Tracer::CommaFormat($sprout->GenomeLength($genomeID));
# Get the number of PEGs.
- my $pegCount = $sprout->FeatureCount($genomeID, 'peg');
+ my $pegCount = Tracer::CommaFormat($sprout->FeatureCount($genomeID, 'peg'));
# Get the number of RNAs.
- my $rnaCount = $sprout->FeatureCount($genomeID, 'rna');
+ my $rnaCount = Tracer::CommaFormat($sprout->FeatureCount($genomeID, 'rna'));
+ # If there are no RNAs, we say we don't know the number, since we know there
+ # must be RNAs somewhere.
+ if (! $rnaCount) {
+ $rnaCount = "n/d";
+ }
# Now we have four categories of features to work with, for each
# combination of named or hypothetical vs. in-subsystem or
# not-in-subsystem. First, we get all of the feature assignments for
# the genome.
my $assignHash = $sprout->GenomeAssignments($genomeID);
# Next, we get all of the features in the genome that belong to a
- # subsystem. This involves a query via the subsystem spreadsheet.
- my %ssHash = map { $_ => 1 } $sprout->GetFlat(['IsGenomeOf', 'ContainsFeature'],
- "IsGenomeOf(from-link) = ?",
- [$genomeID], 'ContainsFeature(to-link)');
+ # subsystem.
+ my %ssHash = $sprout->GenomeSubsystemData($genomeID);
# Create a hash to track the four categories. "s" or "n" indicates
# in or out of a subsystem. "1" or "0" indicates hypothetical or
# real.
@@ -200,12 +217,20 @@
sprintf("%d(%.1f%%)", $counters{$type},
Tracer::Percent($counters{$type}, $totalFeatures)));
}
- # Create the row text. We use a list reference to distribute the TD tag
- # across all the cells.
- my $rowHtml = td(["$genomeName$new", $genomeLen, $pegCount,
- map { $counters{$_} } @columnTypes,
- $rnaCount,
- ]);
+ my @counterValues = map { $counters{$_} } @columnTypes;
+ # The last link is a button to look at the subsystem summaries.
+ my $ssLink = "$options->{linkCGI}?user=\&genome=$genomeID&SPROUT=1&show_subsystems=1";
+ my $ssCol = "
";
+ # Create the row text. Note that we use the distributive capability of the TD
+ # function to apply the same style to each one.
+ my $rowHtml = join("",
+ td("$genomeName$new"),
+ td({ class => $numStyle }, $genomeLen),
+ td({ class => $numStyle }, $pegCount),
+ td({ class => $counterStyle }, \@counterValues),
+ td($ssCol),
+ td({ class => $numStyle }, $rnaCount),
+ );
# Put it in the row hash.
$rows{$genomeName} = $rowHtml;
}