[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.25 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 : parrello 1.16 the old and new sprouts and lists the differences and produces
7 :     a report in HTML. The output is an HTML fragment, not an entire
8 :     web page. This is because we expect it to be included in another
9 :     page.
10 : parrello 1.1
11 : parrello 1.11 The currently-supported command-line options for NewStuffCheck are as follows.
12 : parrello 1.1
13 :     =over 4
14 :    
15 :     =item user
16 :    
17 :     Name suffix to be used for log files. If omitted, the PID is used.
18 :    
19 :     =item trace
20 :    
21 :     Numeric trace level. A higher trace level causes more messages to appear. The
22 :     default trace level is 2. Tracing will be directly to the standard output
23 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
24 :     where I<User> is the value of the B<user> option above.
25 :    
26 :     =item sql
27 :    
28 :     If specified, turns on tracing of SQL activity.
29 :    
30 :     =item background
31 :    
32 :     Save the standard and error output to files. The files will be created
33 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
34 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
35 :     B<user> option above.
36 :    
37 :     =item h
38 :    
39 :     Display this command's parameters and options.
40 :    
41 :     =item phone
42 :    
43 :     Phone number to message when the script is complete.
44 :    
45 : parrello 1.11 =item groupFile
46 :    
47 :     Name of the group file (described below). The default is C<groups.tbl>
48 :     in the Sprout data directory.
49 :    
50 : parrello 1.16 =item outFile
51 :    
52 : parrello 1.19 Output file name. The default is C<html/includes/diff.inc> in the
53 : parrello 1.16 nmpdr C<next> directory.
54 :    
55 : parrello 1.23 =item orgFile
56 :    
57 :     Output file for the genome report. The default is C<html/includes/genomes.inc> in
58 :     the nmpdr C<next> directory.
59 :    
60 : parrello 1.11 =back
61 :    
62 :     =head2 The Group File
63 :    
64 :     A key data file for this process is C<groups.tbl>. This file is kept in the
65 :     Sprout Data directory, and contains the following columns:
66 :    
67 :     =over 4
68 :    
69 :     =item name
70 :    
71 :     Name of the group.
72 :    
73 :     =item page
74 :    
75 :     Name of the group's page on the web site (e.g. C<campy.php> for
76 :     Campylobacter)
77 :    
78 :     =item genus
79 :    
80 :     Genus of the group
81 :    
82 :     =item species
83 :    
84 :     Species of the group, or an empty string if the group is for an entire
85 : parrello 1.13 genus. If the group contains more than one species, the species names
86 :     should be separated by commas.
87 : parrello 1.11
88 : parrello 1.1 =back
89 :    
90 :     =cut
91 :    
92 :     use strict;
93 :     use Tracer;
94 :     use DocUtils;
95 :     use TestUtils;
96 :     use Cwd;
97 :     use File::Copy;
98 :     use File::Path;
99 :     use FIG;
100 :     use SFXlate;
101 :     use Sprout;
102 : parrello 1.16 use CGI;
103 : parrello 1.20 use FIGRules;
104 : parrello 1.1
105 :     # Get the command-line options and parameters.
106 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
107 :     {
108 : parrello 1.11 groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
109 : parrello 1.16 trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
110 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
111 : parrello 1.16 outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
112 : parrello 1.23 orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
113 : parrello 1.1 },
114 :     "",
115 :     @ARGV);
116 :     # Set a variable to contain return type information.
117 :     my $rtype;
118 :     # Insure we catch errors.
119 :     eval {
120 : parrello 1.16 # Get a CGI object for building the output. We pass it the options hash so
121 :     # the formatting subroutines have access to it. Also, we want it to know
122 :     # we're not a real web script.
123 :     my $cgi = CGI->new($options);
124 :     # Start accumulating HTML data.
125 :     my @html = ();
126 :     # Open the output file. We do this early in case there's a problem.
127 :     my $outFileName = $options->{outFile};
128 :     Trace("Opening output file $outFileName.") if T(2);
129 :     Open(\*OUTPUT, ">$outFileName");
130 :     # Get a nice-looking version name and make it into a title.
131 :     my $version = uc $FIG_Config::nmpdr_version;
132 :     $version =~ tr/_/ /;
133 : parrello 1.23 push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
134 : parrello 1.16 # Start the table.
135 :     push @html, $cgi->start_table({align => "center", border => "2"});
136 : parrello 1.11 # Get the group file.
137 :     my $groupFileName = $options->{groupFile};
138 :     Trace("Reading group file $groupFileName.") if T(2);
139 : parrello 1.2 Trace("Processing genomes.") if T(2);
140 : parrello 1.8 # Get the current SEED.
141 :     my $fig = FIG->new();
142 : parrello 1.1 # Get the old Sprout.
143 : parrello 1.22 my $oldSprout = SFXlate->old_sprout_only();
144 : parrello 1.1 # Get its genomes in alphabetical order.
145 : parrello 1.8 my @oldGenomes = GetGenomes($oldSprout);
146 : parrello 1.1 # Get the new Sprout.
147 :     my $newSprout = SFXlate->new_sprout_only();
148 :     # Get its genomes in alphabetical order.
149 : parrello 1.8 my @newGenomes = GetGenomes($newSprout);
150 : parrello 1.7 # Compare the two genomes lists.
151 : parrello 1.1 my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
152 : parrello 1.24 # Get the super-group data.
153 :     my %superTable = $newSprout->CheckGroupFile();
154 :     # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
155 :     # lists so that the genome names are first and the IDs second.
156 :     my @insertedGenomeList = ();
157 : parrello 1.7 for my $insertedGenome (@{$insertedGenomes}) {
158 :     my $genomeID = $insertedGenome->[0];
159 : parrello 1.20 # For a new genome, display the feature and BBH counts.
160 : parrello 1.7 my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
161 :     [$genomeID]);
162 : parrello 1.24 my $suffix = ($count == 1 ? "one feature" : "$count features");
163 : parrello 1.20 my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
164 :     $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
165 : parrello 1.24 push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
166 : parrello 1.7 }
167 : parrello 1.24 # Create a list for the deleted genomes that contains information about SEED status.
168 :     # This list is flipped, too.
169 :     my @deletedGenomeList = ();
170 : parrello 1.8 for my $deletedGenome (@{$deletedGenomes}) {
171 :     my $genomeID = $deletedGenome->[0];
172 : parrello 1.24 my $suffix = "";
173 : parrello 1.8 if ($fig->is_genome($genomeID)) {
174 : parrello 1.24 # Here the deleted genome is still in the SEED.
175 : parrello 1.8 my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
176 : parrello 1.24 $suffix = " (still in SEED, $complete)";
177 :     } else {
178 :     # It's not in the SEED. See if it has been replaced.
179 :     my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
180 :     my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
181 :     if (scalar @genomeIDs) {
182 :     $suffix = " (replaced)";
183 :     }
184 : parrello 1.8 }
185 : parrello 1.24 push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
186 : parrello 1.8 }
187 : parrello 1.1 # Display the lists.
188 : parrello 1.24 push @html, ShowLists($cgi, 'New Genomes' => \@insertedGenomeList,
189 :     'Deleted Genomes' => \@deletedGenomeList);
190 : parrello 1.8 # Now the groups.
191 :     Trace("Comparing groups.") if T(2);
192 :     my %oldGroups = $oldSprout->GetGroups();
193 :     my %newGroups = $newSprout->GetGroups();
194 :     # Loop through the new groups.
195 :     for my $newGroup (sort keys %newGroups) {
196 :     Trace("Processing group $newGroup.") if T(3);
197 :     # Find out if this group is new to this version.
198 :     if (! exists $oldGroups{$newGroup}) {
199 :     # Construct a list of this group's genes.
200 :     my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
201 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
202 : parrello 1.8 } else {
203 : parrello 1.21 # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
204 : parrello 1.24 # on the genome ID, which will become the second list element after the call to NameGenomes.
205 : parrello 1.21 my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
206 :     my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
207 : parrello 1.10 Trace("Comparing lists for $newGroup.") if T(4);
208 : parrello 1.21 my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
209 : parrello 1.10 Trace("Comparison complete.") if T(4);
210 : parrello 1.16 # Delete the old group data. When we're done, this means the hash
211 :     # will contain only the deleted groups.
212 : parrello 1.8 delete $oldGroups{$newGroup};
213 :     # Show the lists. Empty lists will not be shown.
214 : parrello 1.10 Trace("Displaying group lists.") if T(4);
215 : parrello 1.16 push @html, ShowLists($cgi, "Genomes new to $newGroup" => $insertedGroupGenomes,
216 :     "Genomes no longer in $newGroup" => $deletedGroupGenomes);
217 : parrello 1.8 }
218 :     }
219 : parrello 1.10 Trace("Processing deleted groups.") if T(4);
220 : parrello 1.8 # Now list the deleted groups.
221 :     for my $oldGroup (sort keys %oldGroups) {
222 :     Trace("Processing deleted group $oldGroup.") if T(3);
223 :     my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
224 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
225 : parrello 1.8 }
226 : parrello 1.1 # Next, we get the subsystems.
227 : parrello 1.2 Trace("Processing subsystems.") if T(2);
228 : parrello 1.1 my @oldSubsystems = GetSubsystems($oldSprout);
229 :     my @newSubsystems = GetSubsystems($newSprout);
230 :     # Compare and display the subsystem lists.
231 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
232 : parrello 1.8 # Check the deleted subsystems to see if they're in SEED.
233 :     if (scalar @{$deletedSubs} > 0) {
234 :     my %subChecker = map { $_ => 1 } $fig->all_subsystems();
235 :     for my $deletedSub (@{$deletedSubs}) {
236 :     my $subID = $deletedSub->[0];
237 :     if ($subChecker{$subID}) {
238 :     my $trusted = ($fig->usable_subsystem($subID) ? "usable" : "not usable");
239 : parrello 1.9 $deletedSub->[1] .= " (still in SEED, $trusted)";
240 : parrello 1.8 }
241 :     }
242 :     }
243 : parrello 1.16 push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs,
244 :     'Deleted Subsystems' => $deletedSubs);
245 : parrello 1.23 # Print what we've done so far.
246 :     FlushData(\*OUTPUT, \@html);
247 :     # Now we need to process some statistics that require looping through all the
248 :     # features in the new sprout. While we're at it, we'll collect the BBH and
249 :     # coupling counts.
250 :     my $bbhCount = 0;
251 :     my $couplingCount = 0;
252 : parrello 1.24 # We'll accumulate a report of genomes with missing BBHs in here.
253 :     my @bbhMissingGenomes = ();
254 : parrello 1.23 # One of the reports is only for genomes common to both sprouts. To help us
255 :     # make this determination, we get a hash of the inserted genomes.
256 :     my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
257 :     # Open the organism report file.
258 :     Open(\*ORGOUT, ">$options->{orgFile}");
259 :     # Start the table.
260 :     my @orgHtml = ();
261 : parrello 1.24 push @orgHtml, $cgi->h4("Genome Report for $version");
262 : parrello 1.23 push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
263 :     push @orgHtml, $cgi->Tr($cgi->th("Genome"),
264 :     $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
265 :     "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
266 :     FlushData(\*ORGOUT, \@orgHtml);
267 : parrello 1.24 # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
268 :     # not the list of genomes that are new!
269 : parrello 1.23 for my $genomeData (@newGenomes) {
270 :     # Get this genome's ID and name.
271 :     my $genomeID = $genomeData->[0];
272 :     # Create a title for it.
273 :     my $genomeTitle = "$genomeID: $genomeData->[1]";
274 :     # Compute its size.
275 :     my $genomeSize = $newSprout->GenomeLength($genomeID);
276 : parrello 1.24 # Get a list of the genomes in the new Sprout that are not this one.
277 : parrello 1.23 my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
278 :     Trace("Computing BBH matrix for $genomeID.") if T(3);
279 : parrello 1.24 # Get the bbh matrix going from the current genome to the others.
280 : parrello 1.23 my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
281 :     # Set up the subsystem hash. This will be used to track which subsystems are used by
282 :     # the genome's features.
283 :     my %subHash = ();
284 :     # Set up the contig hash. This will be used to track which contigs are used by the
285 :     # genome's features.
286 :     my %contigHash = ();
287 :     # Set up a counter hash for feature types.
288 :     my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
289 :     # We'll store information about the genome's features (ID and functional role) in here.
290 :     my @newFeatures = ();
291 : parrello 1.24 # Finally, we'll use this flag to warn us if there are no BBHs.
292 :     my $bbhsFound = 0;
293 : parrello 1.23 # Loop through the genome's features. The order is important here, because we need
294 :     # to match the order used by "GetFeatures" for the feature difference comparison.
295 :     Trace("Processing feature statistics for $genomeID.") if T(3);
296 :     my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
297 :     [$genomeID]);
298 :     # Loop through the features, updating the counts.
299 :     while (my $feature = $fquery->Fetch()) {
300 :     # Update the total feature count.
301 :     $counters{total}++;
302 :     Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
303 :     # Get the feature ID and role.
304 :     my $fid = $feature->PrimaryValue('Feature(id)');
305 :     push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
306 :     # Check to see if we have BBH data.
307 :     if (exists $matrix{$fid}) {
308 : parrello 1.24 my $fidBbhCount = scalar keys %{$matrix{$fid}};
309 :     if ($fidBbhCount > 0) {
310 :     # Denote that this feature has BBHs.
311 :     $bbhsFound = 1;
312 :     # Add them to the total BBH count.
313 :     $bbhCount += $fidBbhCount;
314 :     }
315 : parrello 1.23 }
316 :     # Ask for couplings.
317 :     my %coupleHash = $newSprout->CoupledFeatures($fid);
318 :     $couplingCount += keys %coupleHash;
319 :     # See if this feature is in a subsystem.
320 :     my %subs = $newSprout->SubsystemsOf($fid);
321 :     if (keys %subs) {
322 :     $counters{in_ss}++;
323 :     for my $sub (keys %subs) {
324 :     $subHash{$sub} = 1;
325 :     }
326 :     }
327 :     # Increment the feature type counter.
328 :     $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
329 :     # Insure we've tracked this feature's contigs.
330 :     my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
331 :     for my $loc (@locations) {
332 :     my $locObject = BasicLocation->new($loc);
333 :     $contigHash{$locObject->Contig} = 1;
334 :     }
335 :     }
336 :     Trace("Feature data compiled for $genomeID.") if T(3);
337 :     # The last thing we need to do is compute the number of features added or deleted.
338 :     # This goes in the genome report, but it's only meaningful for common genomes.
339 :     my ($addCount, $delCount) = ("","");
340 :     if (! $skipGenomes{$genomeID}) {
341 :     # Get the old features.
342 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
343 :     Trace("Comparing features for $genomeID.") if T(3);
344 :     # Compare the lists.
345 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
346 :     $addCount = scalar(@{$insertedFeatures});
347 :     $delCount = scalar(@{$deletedFeatures});
348 :     }
349 : parrello 1.24 # Check to see if this genome is missing its BBHs.
350 :     if (! $bbhsFound) {
351 :     # It is, so add a line for it to the missing-BBH list.
352 :     push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
353 :     }
354 : parrello 1.23 push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
355 :     $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
356 :     scalar(keys %subHash), $counters{in_ss}, $counters{peg},
357 :     $counters{rna}, $counters{pp}, $addCount, $delCount]));
358 :     FlushData(\*ORGOUT, \@orgHtml);
359 :     }
360 :     # Close the table for the genome report.
361 :     push @orgHtml, $cgi->end_table();
362 :     FlushData(\*ORGOUT, \@orgHtml);
363 :     close ORGOUT;
364 : parrello 1.24 # Check for a missing-BBH report.
365 :     if (scalar @bbhMissingGenomes) {
366 :     # There is a report, so put it into the output stream.
367 :     push @html, ShowTitle($cgi, "Genomes without BBHs");
368 :     push @html, @bbhMissingGenomes;
369 :     }
370 :     # Flush the genome feature comparison data and the missing-BBH report (if any).
371 : parrello 1.23 FlushData(\*OUTPUT, \@html);
372 : parrello 1.11 # Next, we show some basic counts.
373 : parrello 1.23 Trace("Displaying counts.") if T(3);
374 : parrello 1.16 push @html, ShowTitle($cgi, "Statistics for old Sprout");
375 :     push @html, ShowCounts($cgi, $oldSprout);
376 :     push @html, ShowTitle($cgi, "Statistics for new Sprout");
377 :     push @html, ShowCounts($cgi, $newSprout);
378 : parrello 1.23 push @html, ShowDatum($cgi, BBHs => $bbhCount);
379 :     push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
380 :     FlushData(\*OUTPUT, \@html);
381 : parrello 1.11 # Now we show the genomes that are not in groups but could be. First, we convert
382 :     # our group hash from the new Sprout into the form used on the web site.
383 :     Trace("Examining possible missing genomes in groups.") if T(2);
384 : parrello 1.24 my %fixedGroups = $newSprout->Fix(%newGroups);
385 :     for my $group (sort keys %superTable) {
386 : parrello 1.11 Trace("Checking group $group.");
387 : parrello 1.25 # Loop through this group's genus/species pairs creating filters
388 :     # for a genome query.
389 :     my @filters = ();
390 :     my @filterParms = ();
391 :     for my $genusSpecies (@{$superTable{$group}->{content}}) {
392 :     my ($genus, $species) = @{$genusSpecies};
393 :     # Filter on genus.
394 :     my $filter = 'Genome(genus) = ?';
395 :     push @filterParms, $genus;
396 :     # If necessary, filter on species.
397 :     if ($species) {
398 :     $filter .= ' AND Genome(species) = ?';
399 :     push @filterParms, $species;
400 :     }
401 :     # Add this filter to the list.
402 :     push @filters, "($filter)";
403 :     }
404 :     # Get all the genomes that should be in the super-group.
405 :     my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters),
406 :     \@filterParms, 'Genome(id)');
407 : parrello 1.11 # Get a hash of the genomes already in it.
408 :     my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
409 :     # Get the possibles that aren't in the group and add identifying information.
410 : parrello 1.12 my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
411 : parrello 1.11 # If anything survived, show the list.
412 :     if (@leftOut) {
413 : parrello 1.16 push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
414 : parrello 1.11 }
415 :     }
416 : parrello 1.23 FlushData(\*OUTPUT, \@html);
417 : parrello 1.16 # Close the table.
418 :     push @html, $cgi->end_table();
419 : parrello 1.23 # Flush the last of the HTML.
420 :     FlushData(\*OUTPUT, \@html);
421 : parrello 1.16 # Close the output file.
422 :     close OUTPUT;
423 :     Trace("Analysis complete.") if T(2);
424 : parrello 1.1 };
425 :     if ($@) {
426 :     Trace("Script failed with error: $@") if T(0);
427 :     $rtype = "error";
428 :     } else {
429 :     Trace("Script complete.") if T(2);
430 :     $rtype = "no error";
431 :     }
432 :     if ($options->{phone}) {
433 : parrello 1.11 my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
434 : parrello 1.1 if ($msgID) {
435 :     Trace("Phone message sent with ID $msgID.") if T(2);
436 :     } else {
437 :     Trace("Phone message not sent.") if T(2);
438 :     }
439 :     }
440 :    
441 : parrello 1.23 =head3 FlushData
442 :    
443 : parrello 1.24 FlushData($handle, \@lines);
444 : parrello 1.23
445 :     Write the specified lines to the output file and clear them out of the list. This
446 :     method is called periodically so that even if something goes wrong we can still
447 : parrello 1.24 see the data accumulating in the output file. The key aspect here is that we
448 :     put new-line characters after each line written and show something in the trace
449 :     log.
450 : parrello 1.23
451 :     =over 4
452 :    
453 :     =item handle
454 :    
455 :     Output handle to which the lines should be written.
456 :    
457 :     =item lines
458 :    
459 :     Reference to a list of output lines. The output lines will be written to the output
460 :     handle and then removed from the list.
461 :    
462 :     =back
463 :    
464 :     =cut
465 :    
466 :     sub FlushData {
467 :     # Get the parameters.
468 :     my ($handle, $lines) = @_;
469 :     Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
470 :     # Write the lines.
471 :     print $handle join("\n", @{$lines});
472 :     # Write a terminating new-line.
473 :     print $handle "\n";
474 :     # Clear the list.
475 :     splice @{$lines};
476 :     }
477 :    
478 : parrello 1.8 =head3 GetGenomes
479 : parrello 1.1
480 : parrello 1.24 my @geneList = GetGenomes($sprout);
481 : parrello 1.1
482 :     Return a list of the genomes in the specified Sprout instance. The genomes
483 :     are returned in alphabetical order by genome ID.
484 :    
485 :     =over 4
486 :    
487 :     =item sprout
488 :    
489 :     Sprout instance whose gene list is desired.
490 :    
491 :     =item RETURN
492 :    
493 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
494 :     and the second is the genome name (genus, species, strain).
495 :    
496 :     =back
497 :    
498 :     =cut
499 :    
500 : parrello 1.8 sub GetGenomes {
501 : parrello 1.1 # Get the parameters.
502 :     my ($sprout) = @_;
503 :     # Get the desired data.
504 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
505 :     'Genome(genus)',
506 :     'Genome(species)',
507 :     'Genome(unique-characterization)']);
508 :     # Create the genome names from the three pieces of the name.
509 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
510 :     # Return the result.
511 :     return @retVal;
512 :     }
513 :    
514 : parrello 1.8 =head3 NameGenomes
515 :    
516 : parrello 1.24 my @newList = NameGenomes($sprout, \@genomes);
517 : parrello 1.8
518 :     Convert a list of genome IDs to a list of genome IDs with names.
519 :    
520 :     =over 4
521 :    
522 :     =item sprout
523 :    
524 :     The relevant sprout instance.
525 :    
526 :     =item genomes
527 :    
528 :     Reference to a list of genome IDs
529 :    
530 :     =item RETURN
531 :    
532 :     Returns a list of 2-tuples, each tuple consisting of a genome ID followed by a
533 :     genome name.
534 :    
535 :     =back
536 :    
537 :     =cut
538 :    
539 :     sub NameGenomes {
540 :     # Get the parameters.
541 :     my ($sprout, $genomes) = @_;
542 :     # Attach the names.
543 : parrello 1.16 my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
544 : parrello 1.8 # Return the result.
545 :     return @retVal;
546 :     }
547 :    
548 : parrello 1.1 =head3 GetSubsystems
549 :    
550 : parrello 1.24 my @subsystems = GetSubsystems($sprout);
551 : parrello 1.1
552 :     Get a list of the subsystems in the specified Sprout instance.
553 :    
554 :     =over 4
555 :    
556 :     =item sprout
557 :    
558 :     Sprout instance whose subsystems are desired.
559 :    
560 :     =item RETURN
561 :    
562 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
563 :     the name of the curator.
564 :    
565 :     =back
566 :    
567 :     =cut
568 :    
569 :     sub GetSubsystems {
570 :     # Get the parameters.
571 :     my ($sprout) = @_;
572 :     # Declare the return variable.
573 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
574 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
575 :     # Return the result.
576 :     return @retVal;
577 :     }
578 :    
579 : parrello 1.11 =head3 GetProperties
580 :    
581 : parrello 1.24 my @propertyList = GetProperties($sprout);
582 : parrello 1.11
583 :     Return a list of properties. Each element in the list will be a 2-tuple containing
584 :     the property name and value in the first column and its ID in the second column.
585 :    
586 :     =over 4
587 :    
588 :     =item sprout
589 :    
590 :     Sprout instance to be used to retrieve the properties.
591 :    
592 :     =item RETURN
593 :    
594 :     Returns a list of 2-tuples. The first element in each 2-tuple will be a string
595 :     in the form of an assignment of the property value to the property name. The second
596 :     element will be the number of features possessing the property. The list will be
597 :     sorted in ascending alphabetical order.
598 :    
599 :     =back
600 :    
601 :     =cut
602 :    
603 :     sub GetProperties {
604 :     # Get the parameters.
605 :     my ($sprout) = @_;
606 :     # Get the properties.
607 :     my @props = $sprout->GetAll(['Property'],
608 :     "ORDER BY Property(property-name), Property(property-value)", [],
609 :     ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
610 :     # Combine the property names and values and replace each property ID by a feature count.
611 :     my @retVal;
612 :     for my $propItem (@props) {
613 : parrello 1.19 # Split up the value on punctuation boundaries for readability.
614 :     my $propValue = $propItem->[1];
615 :     $propValue =~ s/::/ :: /g;
616 :     $propValue =~ s/([,;])(\S)/$1 $2/g;
617 :     my $label = $propItem->[0] . " = " . $propValue;
618 : parrello 1.11 my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
619 :     [$propItem->[2]]);
620 :     push @retVal, [$label, $count];
621 :     }
622 :     # Return the result.
623 :     return @retVal;
624 :     }
625 :    
626 : parrello 1.1 =head3 GetFeatures
627 :    
628 : parrello 1.24 my @features = GetFeatures($sprout, $genomeID);
629 : parrello 1.1
630 :     Return the features of the specified genome in the specified Sprout instance.
631 :    
632 :     =over 4
633 :    
634 :     =item sprout
635 :    
636 :     Sprout instance to use to get the features.
637 :    
638 :     =item genomeID
639 :    
640 :     ID of the genome in question.
641 :    
642 :     =item RETURN
643 :    
644 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
645 :     functional assignment (if any).
646 :    
647 :     =back
648 :    
649 :     =cut
650 :    
651 :     sub GetFeatures {
652 :     # Get the parameters.
653 :     my ($sprout, $genomeID) = @_;
654 :     # Get a list of the feature IDs and map them to their functional assignments.
655 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
656 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
657 :     [$genomeID], 'HasFeature(to-link)');
658 :     # Return the result.
659 :     return @retVal;
660 :     }
661 :    
662 :     =head3 ShowLists
663 :    
664 : parrello 1.24 my @htmlLines = ShowLists($cgi, %lists);
665 : parrello 1.1
666 : parrello 1.16 Display a set of lists. Each list should consist of 2-tuples, and the list
667 :     entries will be displayed as 2-element table rows with a header row.
668 : parrello 1.1
669 :     =over 4
670 :    
671 : parrello 1.16 =item cgi
672 : parrello 1.2
673 : parrello 1.16 A CGI query object containing the options for this program. It is also used to format
674 : parrello 1.24 HTML.
675 : parrello 1.2
676 : parrello 1.1 =item lists
677 :    
678 :     A hash mapping list names to list references.
679 :    
680 : parrello 1.16 =item RETURN
681 :    
682 :     Returns a list of HTML lines displaying the list in tabular form.
683 :    
684 : parrello 1.15 =back
685 :    
686 : parrello 1.1 =cut
687 :    
688 :     sub ShowLists {
689 :     # Get the parameters.
690 : parrello 1.16 my $cgi = shift @_;
691 : parrello 1.1 my %lists = @_;
692 : parrello 1.16 # Declare the return variable. The HTML lines will be accumulated
693 :     # in here and then joined with new-lines.
694 :     my @retVal = ();
695 : parrello 1.1 # Loop through the lists in alphabetical order by list name.
696 : parrello 1.6 for my $listName (sort keys %lists) {
697 : parrello 1.1 # Get the list itself.
698 :     my $list = $lists{$listName};
699 :     # Get the number of list items.
700 :     my $listSize = scalar @{$list};
701 : parrello 1.6 # Only proceed if the list is nonempty.
702 :     if ($listSize > 0) {
703 : parrello 1.16 my $header = ComputeHeader($listName, $listSize);
704 : parrello 1.6 Trace($header) if T(3);
705 : parrello 1.16 # Display the header line as a header.
706 :     push @retVal, ShowTitle($cgi, $header);
707 : parrello 1.17 # Now display the list as table rows. Note we convert underbars to spaces
708 :     # in the name row to make the table easier to fit into narrow places.
709 : parrello 1.16 for my $entry (@{$list}) {
710 :     my ($name, $data) = @{$entry};
711 : parrello 1.18 $name =~ tr/_/ /;
712 : parrello 1.23 push @retVal, ShowDatum($cgi, $name => $data);
713 : parrello 1.1 }
714 :     }
715 :     }
716 : parrello 1.16 # Return the list of HTML lines.
717 :     return @retVal;
718 : parrello 1.1 }
719 :    
720 : parrello 1.16 =head3 ComputeHeader
721 : parrello 1.7
722 : parrello 1.24 my $header = ComputeHeader($name, $count);
723 : parrello 1.7
724 :     Return a list header for a list of the specified length.
725 :    
726 :     =over 4
727 :    
728 :     =item name
729 :    
730 :     Name of the list.
731 :    
732 :     =item count
733 :    
734 :     Number of entries in the list.
735 :    
736 :     =item RETURN
737 :    
738 :     Returns a list header that shows the name of the list and the number of entries.
739 :    
740 :     =back
741 :    
742 :     =cut
743 :    
744 : parrello 1.16 sub ComputeHeader {
745 : parrello 1.7 # Get the parameters.
746 :     my ($name, $count) = @_;
747 :     # Declare the return variable.
748 : parrello 1.15 my $retVal;
749 : parrello 1.7 if ($count == 0) {
750 : parrello 1.16 $retVal = "$name: none";
751 : parrello 1.7 } elsif ($count == 1) {
752 : parrello 1.16 $retVal = "$name: one";
753 : parrello 1.7 } else {
754 : parrello 1.16 $retVal = "$name: $count";
755 : parrello 1.7 }
756 :     # Return the result.
757 :     return $retVal;
758 :     }
759 :    
760 : parrello 1.11 =head3 ShowCounts
761 :    
762 : parrello 1.24 ShowCounts($sprout);
763 : parrello 1.11
764 :     Display general counts for the specified sprout instance. These counts are
765 :     used in progress reports.
766 :    
767 :     =over 4
768 :    
769 : parrello 1.16 =item cgi
770 :    
771 :     CGI query object used to format the output.
772 :    
773 : parrello 1.11 =item sprout
774 :    
775 :     Sprout instance for which counts are to be produced.
776 :    
777 : parrello 1.16 =item RETURN
778 :    
779 :     Returns a list of HTML lines with the counts arranged in table rows.
780 :    
781 : parrello 1.11 =back
782 :    
783 :     =cut
784 :    
785 :     sub ShowCounts {
786 :     # Get the parameters.
787 : parrello 1.16 my ($cgi, $sprout) = @_;
788 : parrello 1.11 # Count genomes and subsystems.
789 :     my $genomes = $sprout->GetCount(['Genome']);
790 :     my $subsystems = $sprout->GetCount(['Subsystem']);
791 : parrello 1.23 # Count roles and external functional assignments.
792 : parrello 1.11 my $roles = $sprout->GetCount(['OccursInSubsystem']);
793 :     my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
794 : parrello 1.16 # Count features.
795 : parrello 1.11 my $features = $sprout->GetCount(['Feature']);
796 :     # Display the counts.
797 : parrello 1.16 my @retVal = ();
798 : parrello 1.23 push @retVal, ShowDatum($cgi, Genomes => $genomes);
799 :     push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
800 :     push @retVal, ShowDatum($cgi, Roles => $roles);
801 :     push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
802 :     push @retVal, ShowDatum($cgi, Features => $features);
803 : parrello 1.16 # Return the html.
804 :     return @retVal;
805 :     }
806 :    
807 : parrello 1.23 =head3 ShowDatum
808 :    
809 : parrello 1.24 my $htmlText = ShowDatum($cgi, $label, $value);
810 : parrello 1.23
811 :     Return a table row displaying the specified label and value.
812 :    
813 :     =over 4
814 :    
815 :     =item cgi
816 :    
817 :     CGI query object used to generate the HTML text.
818 :    
819 :     =item label
820 :    
821 :     Label to appear in the left cell of the table row.
822 :    
823 :     =item value
824 :    
825 :     Value to appear in the right cell of the table row.
826 :    
827 :     =item RETURN
828 :    
829 :     Returns the HTML for a single table row with the last cell right-aligned.
830 :    
831 :     =back
832 :    
833 :     =cut
834 :    
835 :     sub ShowDatum {
836 :     # Get the parameters.
837 :     my ($cgi, $label, $value) = @_;
838 :     # Create the table row.
839 :     my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
840 :     # Return it.
841 :     return $retVal;
842 :     }
843 :    
844 : parrello 1.16 =head3 ShowTitle
845 :    
846 : parrello 1.24 my $html = ShowTitle($cgi, $title);
847 : parrello 1.16
848 :     Display a title line. This will be a merged table row with bolded text.
849 :    
850 :     =over 4
851 :    
852 :     =item cgi
853 :    
854 :     CGI query object used to generate HTML output.
855 :    
856 :     =item RETURN
857 :    
858 :     Returns the HTML text.
859 :    
860 :     =back
861 :    
862 :     =cut
863 :    
864 :     sub ShowTitle {
865 :     # Get the parameters.
866 :     my ($cgi, $title) = @_;
867 :     # Declare the return variable.
868 :     my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
869 :     # Return the result.
870 :     return $retVal;
871 : parrello 1.11 }
872 : parrello 1.16
873 : parrello 1.25 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3