12 |
|
|
13 |
=over 4 |
=over 4 |
14 |
|
|
|
=item nofeats |
|
|
|
|
|
Do not process features. |
|
|
|
|
15 |
=item user |
=item user |
16 |
|
|
17 |
Name suffix to be used for log files. If omitted, the PID is used. |
Name suffix to be used for log files. If omitted, the PID is used. |
52 |
Output file name. The default is C<html/includes/diff.inc> in the |
Output file name. The default is C<html/includes/diff.inc> in the |
53 |
nmpdr C<next> directory. |
nmpdr C<next> directory. |
54 |
|
|
55 |
|
=item orgFile |
56 |
|
|
57 |
|
Output file for the genome report. The default is C<html/includes/genomes.inc> in |
58 |
|
the nmpdr C<next> directory. |
59 |
|
|
60 |
=back |
=back |
61 |
|
|
62 |
=head2 The Group File |
=head2 The Group File |
91 |
|
|
92 |
use strict; |
use strict; |
93 |
use Tracer; |
use Tracer; |
|
use DocUtils; |
|
|
use TestUtils; |
|
94 |
use Cwd; |
use Cwd; |
95 |
use File::Copy; |
use File::Copy; |
96 |
use File::Path; |
use File::Path; |
98 |
use SFXlate; |
use SFXlate; |
99 |
use Sprout; |
use Sprout; |
100 |
use CGI; |
use CGI; |
101 |
|
use FIGRules; |
102 |
|
|
103 |
# Get the command-line options and parameters. |
# Get the command-line options and parameters. |
104 |
my ($options, @parameters) = StandardSetup([qw(Sprout) ], |
my ($options, @parameters) = StandardSetup([qw(Sprout) ], |
105 |
{ |
{ |
106 |
groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"], |
groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"], |
|
nofeats => ["", "if specified, only genome changes will be displayed; otherwise, genome features will be compared and differences shown"], |
|
107 |
trace => ["2", "tracing level; use a minus to prevent tracing to standard output"], |
trace => ["2", "tracing level; use a minus to prevent tracing to standard output"], |
108 |
phone => ["", "phone number (international format) to call when load finishes"], |
phone => ["", "phone number (international format) to call when load finishes"], |
109 |
outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"], |
outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"], |
110 |
|
orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"], |
111 |
}, |
}, |
112 |
"", |
"", |
113 |
@ARGV); |
@ARGV); |
128 |
# Get a nice-looking version name and make it into a title. |
# Get a nice-looking version name and make it into a title. |
129 |
my $version = uc $FIG_Config::nmpdr_version; |
my $version = uc $FIG_Config::nmpdr_version; |
130 |
$version =~ tr/_/ /; |
$version =~ tr/_/ /; |
131 |
push @html, $cgi->h4({align => "center"}, "Difference Report for $version."); |
push @html, $cgi->h4({align => "center"}, "Difference Report for $version"); |
132 |
# Start the table. |
# Start the table. |
133 |
push @html, $cgi->start_table({align => "center", border => "2"}); |
push @html, $cgi->start_table({align => "center", border => "2"}); |
134 |
# Get the group file. |
# Get the group file. |
135 |
my $groupFileName = $options->{groupFile}; |
my $groupFileName = $options->{groupFile}; |
136 |
Trace("Reading group file $groupFileName.") if T(2); |
Trace("Reading group file $groupFileName.") if T(2); |
|
# We'll put each group's data into a hash, keyed by group |
|
|
# name, each entry being a 3-tuple of page name, genus, |
|
|
# and species |
|
|
my %groups = Sprout::ReadGroupFile($groupFileName); |
|
137 |
Trace("Processing genomes.") if T(2); |
Trace("Processing genomes.") if T(2); |
138 |
# Get the current SEED. |
# Get the current SEED. |
139 |
my $fig = FIG->new(); |
my $fig = FIG->new(); |
140 |
# Get the old Sprout. |
# Get the old Sprout. |
141 |
my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB); |
my $oldSprout = SFXlate->old_sprout_only(); |
142 |
# Get its genomes in alphabetical order. |
# Get its genomes in alphabetical order. |
143 |
my @oldGenomes = GetGenomes($oldSprout); |
my @oldGenomes = GetGenomes($oldSprout); |
144 |
# Get the new Sprout. |
# Get the new Sprout. |
147 |
my @newGenomes = GetGenomes($newSprout); |
my @newGenomes = GetGenomes($newSprout); |
148 |
# Compare the two genomes lists. |
# Compare the two genomes lists. |
149 |
my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes); |
my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes); |
150 |
# Add feature counts to the new genomes. |
# Get the super-group data. |
151 |
|
my %superTable = $newSprout->CheckGroupFile(); |
152 |
|
# Create a list for the new genomes that includes BBH and feature counts. We'll flip this |
153 |
|
# lists so that the genome names are first and the IDs second. |
154 |
|
my @insertedGenomeList = (); |
155 |
for my $insertedGenome (@{$insertedGenomes}) { |
for my $insertedGenome (@{$insertedGenomes}) { |
156 |
my $genomeID = $insertedGenome->[0]; |
my $genomeID = $insertedGenome->[0]; |
157 |
# For a new genome, display the feature count. |
# For a new genome, display the feature and BBH counts. |
158 |
my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?", |
my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?", |
159 |
[$genomeID]); |
[$genomeID]); |
160 |
my $suffix = ($count == 1 ? " one feature" : "$count features"); |
my $suffix = ($count == 1 ? " one feature" : "$count features"); |
161 |
$insertedGenome->[1] .= "($suffix)"; |
my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10); |
162 |
} |
$suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs"); |
163 |
# Add information about SEED status to the deleted genomes. |
push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"]; |
164 |
|
} |
165 |
|
# Create a list for the deleted genomes that contains information about SEED status. |
166 |
|
# This list is flipped, too. |
167 |
|
my @deletedGenomeList = (); |
168 |
for my $deletedGenome (@{$deletedGenomes}) { |
for my $deletedGenome (@{$deletedGenomes}) { |
169 |
my $genomeID = $deletedGenome->[0]; |
my $genomeID = $deletedGenome->[0]; |
170 |
|
my $suffix = ""; |
171 |
if ($fig->is_genome($genomeID)) { |
if ($fig->is_genome($genomeID)) { |
172 |
|
# Here the deleted genome is still in the SEED. |
173 |
my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete"); |
my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete"); |
174 |
$deletedGenome->[1] .= "(still in SEED, $complete)"; |
$suffix = " (still in SEED, $complete)"; |
175 |
|
} else { |
176 |
|
# It's not in the SEED. See if it has been replaced. |
177 |
|
my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID); |
178 |
|
my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain); |
179 |
|
if (scalar @genomeIDs) { |
180 |
|
$suffix = " (replaced)"; |
181 |
|
} |
182 |
} |
} |
183 |
|
push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"]; |
184 |
} |
} |
185 |
# Display the lists. |
# Display the lists. |
186 |
push @html, ShowLists($cgi, 'New Genomes' => $insertedGenomes, |
push @html, ShowLists($cgi, 'New Genomes' => \@insertedGenomeList, |
187 |
'Deleted Genomes' => $deletedGenomes); |
'Deleted Genomes' => \@deletedGenomeList); |
188 |
# Now the groups. |
# Now the groups. |
189 |
Trace("Comparing groups.") if T(2); |
Trace("Comparing groups.") if T(2); |
190 |
my %oldGroups = $oldSprout->GetGroups(); |
my %oldGroups = $oldSprout->GetGroups(); |
198 |
my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup}); |
my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup}); |
199 |
push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes); |
push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes); |
200 |
} else { |
} else { |
201 |
# Here the group is in both versions. Fix the lists and compare them. |
# Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing |
202 |
my @newGroupList = NameGenomes($newSprout, $newGroups{$newGroup}); |
# on the genome ID, which will become the second list element after the call to NameGenomes. |
203 |
my @oldGroupList = NameGenomes($oldSprout, $oldGroups{$newGroup}); |
my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup}); |
204 |
|
my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup}); |
205 |
Trace("Comparing lists for $newGroup.") if T(4); |
Trace("Comparing lists for $newGroup.") if T(4); |
206 |
my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList); |
my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1); |
207 |
Trace("Comparison complete.") if T(4); |
Trace("Comparison complete.") if T(4); |
208 |
# Delete the old group data. When we're done, this means the hash |
# Delete the old group data. When we're done, this means the hash |
209 |
# will contain only the deleted groups. |
# will contain only the deleted groups. |
240 |
} |
} |
241 |
push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs, |
push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs, |
242 |
'Deleted Subsystems' => $deletedSubs); |
'Deleted Subsystems' => $deletedSubs); |
243 |
|
# Print what we've done so far. |
244 |
|
FlushData(\*OUTPUT, \@html); |
245 |
|
# Now we need to process some statistics that require looping through all the |
246 |
|
# features in the new sprout. While we're at it, we'll collect the BBH and |
247 |
|
# coupling counts. |
248 |
|
my $bbhCount = 0; |
249 |
|
my $couplingCount = 0; |
250 |
|
# We'll accumulate a report of genomes with missing BBHs in here. |
251 |
|
my @bbhMissingGenomes = (); |
252 |
|
# One of the reports is only for genomes common to both sprouts. To help us |
253 |
|
# make this determination, we get a hash of the inserted genomes. |
254 |
|
my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes}; |
255 |
|
# Open the organism report file. |
256 |
|
Open(\*ORGOUT, ">$options->{orgFile}"); |
257 |
|
# Start the table. |
258 |
|
my @orgHtml = (); |
259 |
|
push @orgHtml, $cgi->h4({ align => 'center' }, "Genome Report for $version"); |
260 |
|
push @orgHtml, $cgi->start_table({ border => 2, align => 'center'}); |
261 |
|
push @orgHtml, $cgi->Tr($cgi->th("Genome"), |
262 |
|
$cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs", |
263 |
|
"F In SS", "PEGs", "RNAs", "PPs", "New", "Del"])); |
264 |
|
FlushData(\*ORGOUT, \@orgHtml); |
265 |
|
# Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout, |
266 |
|
# not the list of genomes that are new! |
267 |
|
for my $genomeData (@newGenomes) { |
268 |
|
# Get this genome's ID and name. |
269 |
|
my $genomeID = $genomeData->[0]; |
270 |
|
# Create a title for it. |
271 |
|
my $genomeTitle = "$genomeID: $genomeData->[1]"; |
272 |
|
# Compute its size. |
273 |
|
my $genomeSize = $newSprout->GenomeLength($genomeID); |
274 |
|
# Get a list of the genomes in the new Sprout that are not this one. |
275 |
|
my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes; |
276 |
|
Trace("Computing BBH matrix for $genomeID.") if T(3); |
277 |
|
# Get the bbh matrix going from the current genome to the others. |
278 |
|
my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes); |
279 |
|
# Set up the subsystem hash. This will be used to track which subsystems are used by |
280 |
|
# the genome's features. |
281 |
|
my %subHash = (); |
282 |
|
# Set up the contig hash. This will be used to track which contigs are used by the |
283 |
|
# genome's features. |
284 |
|
my %contigHash = (); |
285 |
|
# Set up a counter hash for feature types. |
286 |
|
my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0); |
287 |
|
# We'll store information about the genome's features (ID and functional role) in here. |
288 |
|
my @newFeatures = (); |
289 |
|
# Finally, we'll use this flag to warn us if there are no BBHs. |
290 |
|
my $bbhsFound = 0; |
291 |
|
# Loop through the genome's features. The order is important here, because we need |
292 |
|
# to match the order used by "GetFeatures" for the feature difference comparison. |
293 |
|
Trace("Processing feature statistics for $genomeID.") if T(3); |
294 |
|
my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)", |
295 |
|
[$genomeID]); |
296 |
|
# Loop through the features, updating the counts. |
297 |
|
while (my $feature = $fquery->Fetch()) { |
298 |
|
# Update the total feature count. |
299 |
|
$counters{total}++; |
300 |
|
Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0); |
301 |
|
# Get the feature ID and role. |
302 |
|
my $fid = $feature->PrimaryValue('Feature(id)'); |
303 |
|
push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')]; |
304 |
|
# Check to see if we have BBH data. |
305 |
|
if (exists $matrix{$fid}) { |
306 |
|
my $fidBbhCount = scalar keys %{$matrix{$fid}}; |
307 |
|
if ($fidBbhCount > 0) { |
308 |
|
# Denote that this feature has BBHs. |
309 |
|
$bbhsFound = 1; |
310 |
|
# Add them to the total BBH count. |
311 |
|
$bbhCount += $fidBbhCount; |
312 |
|
} |
313 |
|
} |
314 |
|
# Ask for couplings. |
315 |
|
my %coupleHash = $newSprout->CoupledFeatures($fid); |
316 |
|
$couplingCount += keys %coupleHash; |
317 |
|
# See if this feature is in a subsystem. |
318 |
|
my %subs = $newSprout->SubsystemsOf($fid); |
319 |
|
if (keys %subs) { |
320 |
|
$counters{in_ss}++; |
321 |
|
for my $sub (keys %subs) { |
322 |
|
$subHash{$sub} = 1; |
323 |
|
} |
324 |
|
} |
325 |
|
# Increment the feature type counter. |
326 |
|
$counters{$feature->PrimaryValue('Feature(feature-type)')}++; |
327 |
|
# Insure we've tracked this feature's contigs. |
328 |
|
my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)'); |
329 |
|
for my $loc (@locations) { |
330 |
|
my $locObject = BasicLocation->new($loc); |
331 |
|
$contigHash{$locObject->Contig} = 1; |
332 |
|
} |
333 |
|
} |
334 |
|
Trace("Feature data compiled for $genomeID.") if T(3); |
335 |
|
# The last thing we need to do is compute the number of features added or deleted. |
336 |
|
# This goes in the genome report, but it's only meaningful for common genomes. |
337 |
|
my ($addCount, $delCount) = ("",""); |
338 |
|
if (! $skipGenomes{$genomeID}) { |
339 |
|
# Get the old features. |
340 |
|
my @oldFeatures = GetFeatures($oldSprout, $genomeID); |
341 |
|
Trace("Comparing features for $genomeID.") if T(3); |
342 |
|
# Compare the lists. |
343 |
|
my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures); |
344 |
|
$addCount = scalar(@{$insertedFeatures}); |
345 |
|
$delCount = scalar(@{$deletedFeatures}); |
346 |
|
} |
347 |
|
# Check to see if this genome is missing its BBHs. |
348 |
|
if (! $bbhsFound) { |
349 |
|
# It is, so add a line for it to the missing-BBH list. |
350 |
|
push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID); |
351 |
|
} |
352 |
|
push @orgHtml, $cgi->Tr($cgi->td($genomeTitle), |
353 |
|
$cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash), |
354 |
|
scalar(keys %subHash), $counters{in_ss}, $counters{peg}, |
355 |
|
$counters{rna}, $counters{pp}, $addCount, $delCount])); |
356 |
|
FlushData(\*ORGOUT, \@orgHtml); |
357 |
|
} |
358 |
|
# Close the table for the genome report. |
359 |
|
push @orgHtml, $cgi->end_table(); |
360 |
|
FlushData(\*ORGOUT, \@orgHtml); |
361 |
|
close ORGOUT; |
362 |
|
# Check for a missing-BBH report. |
363 |
|
if (scalar @bbhMissingGenomes) { |
364 |
|
# There is a report, so put it into the output stream. |
365 |
|
push @html, ShowTitle($cgi, "Genomes without BBHs"); |
366 |
|
push @html, @bbhMissingGenomes; |
367 |
|
} |
368 |
|
# Flush the genome feature comparison data and the missing-BBH report (if any). |
369 |
|
FlushData(\*OUTPUT, \@html); |
370 |
# Next, we show some basic counts. |
# Next, we show some basic counts. |
371 |
|
Trace("Displaying counts.") if T(3); |
372 |
push @html, ShowTitle($cgi, "Statistics for old Sprout"); |
push @html, ShowTitle($cgi, "Statistics for old Sprout"); |
373 |
push @html, ShowCounts($cgi, $oldSprout); |
push @html, ShowCounts($cgi, $oldSprout); |
374 |
push @html, ShowTitle($cgi, "Statistics for new Sprout"); |
push @html, ShowTitle($cgi, "Statistics for new Sprout"); |
375 |
push @html, ShowCounts($cgi, $newSprout); |
push @html, ShowCounts($cgi, $newSprout); |
376 |
|
push @html, ShowDatum($cgi, BBHs => $bbhCount); |
377 |
|
push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount); |
378 |
|
FlushData(\*OUTPUT, \@html); |
379 |
# Now we show the genomes that are not in groups but could be. First, we convert |
# Now we show the genomes that are not in groups but could be. First, we convert |
380 |
# our group hash from the new Sprout into the form used on the web site. |
# our group hash from the new Sprout into the form used on the web site. |
381 |
Trace("Examining possible missing genomes in groups.") if T(2); |
Trace("Examining possible missing genomes in groups.") if T(2); |
382 |
my %fixedGroups = Sprout::Fix(%newGroups); |
my %fixedGroups = $newSprout->Fix(%newGroups); |
383 |
for my $group (sort keys %groups) { |
for my $group (sort keys %superTable) { |
384 |
Trace("Checking group $group."); |
Trace("Checking group $group."); |
385 |
# Get this group's genus and species. |
# Loop through this group's genus/species pairs creating filters |
386 |
my $genus = $groups{$group}->[1]; |
# for a genome query. |
387 |
my $species = $groups{$group}->[2]; |
my @filters = (); |
388 |
# Get a hash of the genomes already in it. |
my @filterParms = (); |
389 |
my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}}; |
for my $genusSpecies (@{$superTable{$group}->{content}}) { |
390 |
# Get a list of its possible genomes. |
my ($genus, $species) = @{$genusSpecies}; |
391 |
|
# Filter on genus. |
392 |
my $filter = 'Genome(genus) = ?'; |
my $filter = 'Genome(genus) = ?'; |
393 |
my @parms = ($genus); |
push @filterParms, $genus; |
394 |
# The species list is tricky because a given group may involve more than |
# If necessary, filter on species. |
|
# one target species. The species names will be comma-separated, and |
|
|
# we use some PERL trickiness to generate an OR filter for them. |
|
395 |
if ($species) { |
if ($species) { |
396 |
# Get the individual species. |
$filter .= ' AND Genome(species) = ?'; |
397 |
my @speciesList = split /\s*,\s*/, $species; |
push @filterParms, $species; |
398 |
# Create one species filter per species. |
} |
399 |
my @filterClauses = map { 'Genome(species) = ?' } @speciesList; |
# Add this filter to the list. |
400 |
# OR the filter clauses together to get a real filter. |
push @filters, "($filter)"; |
|
$filter .= " AND (" . (join " OR ", @filterClauses) . ")"; |
|
|
# Add the specieis names to the SQL parameter list. |
|
|
push @parms, @speciesList; |
|
401 |
} |
} |
402 |
my @possibles = $newSprout->GetFlat(['Genome'], $filter, \@parms, 'Genome(id)'); |
# Get all the genomes that should be in the super-group. |
403 |
|
my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters), |
404 |
|
\@filterParms, 'Genome(id)'); |
405 |
|
# Get a hash of the genomes already in it. |
406 |
|
my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}}; |
407 |
# Get the possibles that aren't in the group and add identifying information. |
# Get the possibles that aren't in the group and add identifying information. |
408 |
my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]); |
my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]); |
409 |
# If anything survived, show the list. |
# If anything survived, show the list. |
411 |
push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut); |
push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut); |
412 |
} |
} |
413 |
} |
} |
414 |
# Compare the property tables. |
FlushData(\*OUTPUT, \@html); |
|
Trace("Comparing properties.") if T(2); |
|
|
# Set up lists of all the properties in each sprout. |
|
|
my @oldProps = GetProperties($oldSprout); |
|
|
my @newProps = GetProperties($newSprout); |
|
|
# Compare the lists. |
|
|
my ($insertedProps, $deletedProps) = Tracer::CompareLists(\@oldProps, \@newProps); |
|
|
# Now get all the properties in the new Sprout without any features. |
|
|
my @emptyProps = grep { $_->[1] == 0 } @newProps; |
|
|
# Show what we've found. |
|
|
push @html, ShowLists($cgi, 'New Properties' => $insertedProps, |
|
|
'Deleted Properties' => $deletedProps, |
|
|
'Empty Properties' => \@emptyProps); |
|
|
# Now we process the features of the common genes. |
|
|
if (! $options->{nofeats}) { |
|
|
# First we need a hash of the inserted stuff so we know to skip it. |
|
|
my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes}; |
|
|
# Loop through the genomees. |
|
|
for my $genome (@newGenomes) { |
|
|
# Get the ID and name. |
|
|
my ($genomeID, $genomeName) = @{$genome}; |
|
|
Trace("Processing $genomeID.") if T(3); |
|
|
# Only process the common genes. |
|
|
if (! $skipGenomes{$genomeID}) { |
|
|
# Compare the genome group information. |
|
|
# Get the new and old features. This will be very stressful to the machine, |
|
|
# because there are lots of features. |
|
|
my @oldFeatures = GetFeatures($oldSprout, $genomeID); |
|
|
my @newFeatures = GetFeatures($newSprout, $genomeID); |
|
|
Trace("Comparing features for $genomeID.") if T(3); |
|
|
# Compare the lists. |
|
|
my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures); |
|
|
# Display the lists. Only nonempty lists are displayed; however, we do a check |
|
|
# first anyway so the trace tells us what's happening. |
|
|
if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) { |
|
|
Trace("Displaying feature differences.") if T(3); |
|
|
push @html, ShowLists($cgi, "New Features for $genomeID" => $insertedFeatures, |
|
|
"Features Deleted from $genomeID" => $deletedFeatures); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
415 |
# Close the table. |
# Close the table. |
416 |
push @html, $cgi->end_table(); |
push @html, $cgi->end_table(); |
417 |
# Assemble and write the HTML. |
# Flush the last of the HTML. |
418 |
Trace("Writing to output.") if T(2); |
FlushData(\*OUTPUT, \@html); |
|
print OUTPUT join("\n", @html, ""); |
|
419 |
# Close the output file. |
# Close the output file. |
420 |
close OUTPUT; |
close OUTPUT; |
421 |
Trace("Analysis complete.") if T(2); |
Trace("Analysis complete.") if T(2); |
436 |
} |
} |
437 |
} |
} |
438 |
|
|
439 |
|
=head3 FlushData |
440 |
|
|
441 |
|
FlushData($handle, \@lines); |
442 |
|
|
443 |
|
Write the specified lines to the output file and clear them out of the list. This |
444 |
|
method is called periodically so that even if something goes wrong we can still |
445 |
|
see the data accumulating in the output file. The key aspect here is that we |
446 |
|
put new-line characters after each line written and show something in the trace |
447 |
|
log. |
448 |
|
|
449 |
|
=over 4 |
450 |
|
|
451 |
|
=item handle |
452 |
|
|
453 |
|
Output handle to which the lines should be written. |
454 |
|
|
455 |
|
=item lines |
456 |
|
|
457 |
|
Reference to a list of output lines. The output lines will be written to the output |
458 |
|
handle and then removed from the list. |
459 |
|
|
460 |
|
=back |
461 |
|
|
462 |
|
=cut |
463 |
|
|
464 |
|
sub FlushData { |
465 |
|
# Get the parameters. |
466 |
|
my ($handle, $lines) = @_; |
467 |
|
Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3); |
468 |
|
# Write the lines. |
469 |
|
print $handle join("\n", @{$lines}); |
470 |
|
# Write a terminating new-line. |
471 |
|
print $handle "\n"; |
472 |
|
# Clear the list. |
473 |
|
splice @{$lines}; |
474 |
|
} |
475 |
|
|
476 |
=head3 GetGenomes |
=head3 GetGenomes |
477 |
|
|
478 |
C<< my @geneList = GetGenomes($sprout); >> |
my @geneList = GetGenomes($sprout); |
479 |
|
|
480 |
Return a list of the genomes in the specified Sprout instance. The genomes |
Return a list of the genomes in the specified Sprout instance. The genomes |
481 |
are returned in alphabetical order by genome ID. |
are returned in alphabetical order by genome ID. |
511 |
|
|
512 |
=head3 NameGenomes |
=head3 NameGenomes |
513 |
|
|
514 |
C<< my @newList = NameGenomes($sprout, \@genomes); >> |
my @newList = NameGenomes($sprout, \@genomes); |
515 |
|
|
516 |
Convert a list of genome IDs to a list of genome IDs with names. |
Convert a list of genome IDs to a list of genome IDs with names. |
517 |
|
|
545 |
|
|
546 |
=head3 GetSubsystems |
=head3 GetSubsystems |
547 |
|
|
548 |
C<< my @subsystems = GetSubsystems($sprout); >> |
my @subsystems = GetSubsystems($sprout); |
549 |
|
|
550 |
Get a list of the subsystems in the specified Sprout instance. |
Get a list of the subsystems in the specified Sprout instance. |
551 |
|
|
576 |
|
|
577 |
=head3 GetProperties |
=head3 GetProperties |
578 |
|
|
579 |
C<< my @propertyList = GetProperties($sprout); >> |
my @propertyList = GetProperties($sprout); |
580 |
|
|
581 |
Return a list of properties. Each element in the list will be a 2-tuple containing |
Return a list of properties. Each element in the list will be a 2-tuple containing |
582 |
the property name and value in the first column and its ID in the second column. |
the property name and value in the first column and its ID in the second column. |
623 |
|
|
624 |
=head3 GetFeatures |
=head3 GetFeatures |
625 |
|
|
626 |
C<< my @features = GetFeatures($sprout, $genomeID); >> |
my @features = GetFeatures($sprout, $genomeID); |
627 |
|
|
628 |
Return the features of the specified genome in the specified Sprout instance. |
Return the features of the specified genome in the specified Sprout instance. |
629 |
|
|
659 |
|
|
660 |
=head3 ShowLists |
=head3 ShowLists |
661 |
|
|
662 |
C<< my @htmlLines = ShowLists($cgi, %lists); >> |
my @htmlLines = ShowLists($cgi, %lists); |
663 |
|
|
664 |
Display a set of lists. Each list should consist of 2-tuples, and the list |
Display a set of lists. Each list should consist of 2-tuples, and the list |
665 |
entries will be displayed as 2-element table rows with a header row. |
entries will be displayed as 2-element table rows with a header row. |
707 |
for my $entry (@{$list}) { |
for my $entry (@{$list}) { |
708 |
my ($name, $data) = @{$entry}; |
my ($name, $data) = @{$entry}; |
709 |
$name =~ tr/_/ /; |
$name =~ tr/_/ /; |
710 |
push @retVal, $cgi->Tr($cgi->td($name), $cgi->td({align => "right"}, $data)); |
push @retVal, ShowDatum($cgi, $name => $data); |
711 |
} |
} |
712 |
} |
} |
713 |
} |
} |
717 |
|
|
718 |
=head3 ComputeHeader |
=head3 ComputeHeader |
719 |
|
|
720 |
C<< my $header = ComputeHeader($name, $count); >> |
my $header = ComputeHeader($name, $count); |
721 |
|
|
722 |
Return a list header for a list of the specified length. |
Return a list header for a list of the specified length. |
723 |
|
|
757 |
|
|
758 |
=head3 ShowCounts |
=head3 ShowCounts |
759 |
|
|
760 |
C<< ShowCounts($sprout); >> |
ShowCounts($sprout); |
761 |
|
|
762 |
Display general counts for the specified sprout instance. These counts are |
Display general counts for the specified sprout instance. These counts are |
763 |
used in progress reports. |
used in progress reports. |
786 |
# Count genomes and subsystems. |
# Count genomes and subsystems. |
787 |
my $genomes = $sprout->GetCount(['Genome']); |
my $genomes = $sprout->GetCount(['Genome']); |
788 |
my $subsystems = $sprout->GetCount(['Subsystem']); |
my $subsystems = $sprout->GetCount(['Subsystem']); |
789 |
# Count roles, external functional assignments, and functional couplings. |
# Count roles and external functional assignments. |
790 |
my $roles = $sprout->GetCount(['OccursInSubsystem']); |
my $roles = $sprout->GetCount(['OccursInSubsystem']); |
791 |
my $funcs = $sprout->GetCount(['ExternalAliasFunc']); |
my $funcs = $sprout->GetCount(['ExternalAliasFunc']); |
|
my $couples = $sprout->GetCount(['Coupling']); |
|
792 |
# Count features. |
# Count features. |
793 |
my $features = $sprout->GetCount(['Feature']); |
my $features = $sprout->GetCount(['Feature']); |
794 |
# Display the counts. |
# Display the counts. |
795 |
my @retVal = (); |
my @retVal = (); |
796 |
push @retVal, $cgi->Tr($cgi->td("Genomes"), $cgi->td({ align => "right" }, $genomes)); |
push @retVal, ShowDatum($cgi, Genomes => $genomes); |
797 |
push @retVal, $cgi->Tr($cgi->td("Subsystems"), $cgi->td({ align => "right" }, $subsystems)); |
push @retVal, ShowDatum($cgi, Subsystems => $subsystems); |
798 |
push @retVal, $cgi->Tr($cgi->td("Roles"), $cgi->td({ align => "right" }, $roles)); |
push @retVal, ShowDatum($cgi, Roles => $roles); |
799 |
push @retVal, $cgi->Tr($cgi->td("External function assignments"), $cgi->td({ align => "right" }, $funcs)); |
push @retVal, ShowDatum($cgi, 'External function assignments', $funcs); |
800 |
push @retVal, $cgi->Tr($cgi->td("Features"), $cgi->td({ align => "right" }, $features)); |
push @retVal, ShowDatum($cgi, Features => $features); |
|
push @retVal, $cgi->Tr($cgi->td("Functional Coupling"), $cgi->td({ align => "right" }, $couples)); |
|
801 |
# Return the html. |
# Return the html. |
802 |
return @retVal; |
return @retVal; |
803 |
} |
} |
804 |
|
|
805 |
|
=head3 ShowDatum |
806 |
|
|
807 |
|
my $htmlText = ShowDatum($cgi, $label, $value); |
808 |
|
|
809 |
|
Return a table row displaying the specified label and value. |
810 |
|
|
811 |
|
=over 4 |
812 |
|
|
813 |
|
=item cgi |
814 |
|
|
815 |
|
CGI query object used to generate the HTML text. |
816 |
|
|
817 |
|
=item label |
818 |
|
|
819 |
|
Label to appear in the left cell of the table row. |
820 |
|
|
821 |
|
=item value |
822 |
|
|
823 |
|
Value to appear in the right cell of the table row. |
824 |
|
|
825 |
|
=item RETURN |
826 |
|
|
827 |
|
Returns the HTML for a single table row with the last cell right-aligned. |
828 |
|
|
829 |
|
=back |
830 |
|
|
831 |
|
=cut |
832 |
|
|
833 |
|
sub ShowDatum { |
834 |
|
# Get the parameters. |
835 |
|
my ($cgi, $label, $value) = @_; |
836 |
|
# Create the table row. |
837 |
|
my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value)); |
838 |
|
# Return it. |
839 |
|
return $retVal; |
840 |
|
} |
841 |
|
|
842 |
=head3 ShowTitle |
=head3 ShowTitle |
843 |
|
|
844 |
C<< my $html = ShowTitle($cgi, $title); >> |
my $html = ShowTitle($cgi, $title); |
845 |
|
|
846 |
Display a title line. This will be a merged table row with bolded text. |
Display a title line. This will be a merged table row with bolded text. |
847 |
|
|