[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.23 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 : parrello 1.16 the old and new sprouts and lists the differences and produces
7 :     a report in HTML. The output is an HTML fragment, not an entire
8 :     web page. This is because we expect it to be included in another
9 :     page.
10 : parrello 1.1
11 : parrello 1.11 The currently-supported command-line options for NewStuffCheck are as follows.
12 : parrello 1.1
13 :     =over 4
14 :    
15 :     =item user
16 :    
17 :     Name suffix to be used for log files. If omitted, the PID is used.
18 :    
19 :     =item trace
20 :    
21 :     Numeric trace level. A higher trace level causes more messages to appear. The
22 :     default trace level is 2. Tracing will be directly to the standard output
23 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
24 :     where I<User> is the value of the B<user> option above.
25 :    
26 :     =item sql
27 :    
28 :     If specified, turns on tracing of SQL activity.
29 :    
30 :     =item background
31 :    
32 :     Save the standard and error output to files. The files will be created
33 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
34 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
35 :     B<user> option above.
36 :    
37 :     =item h
38 :    
39 :     Display this command's parameters and options.
40 :    
41 :     =item phone
42 :    
43 :     Phone number to message when the script is complete.
44 :    
45 : parrello 1.11 =item groupFile
46 :    
47 :     Name of the group file (described below). The default is C<groups.tbl>
48 :     in the Sprout data directory.
49 :    
50 : parrello 1.16 =item outFile
51 :    
52 : parrello 1.19 Output file name. The default is C<html/includes/diff.inc> in the
53 : parrello 1.16 nmpdr C<next> directory.
54 :    
55 : parrello 1.23 =item orgFile
56 :    
57 :     Output file for the genome report. The default is C<html/includes/genomes.inc> in
58 :     the nmpdr C<next> directory.
59 :    
60 : parrello 1.11 =back
61 :    
62 :     =head2 The Group File
63 :    
64 :     A key data file for this process is C<groups.tbl>. This file is kept in the
65 :     Sprout Data directory, and contains the following columns:
66 :    
67 :     =over 4
68 :    
69 :     =item name
70 :    
71 :     Name of the group.
72 :    
73 :     =item page
74 :    
75 :     Name of the group's page on the web site (e.g. C<campy.php> for
76 :     Campylobacter)
77 :    
78 :     =item genus
79 :    
80 :     Genus of the group
81 :    
82 :     =item species
83 :    
84 :     Species of the group, or an empty string if the group is for an entire
85 : parrello 1.13 genus. If the group contains more than one species, the species names
86 :     should be separated by commas.
87 : parrello 1.11
88 : parrello 1.1 =back
89 :    
90 :     =cut
91 :    
92 :     use strict;
93 :     use Tracer;
94 :     use DocUtils;
95 :     use TestUtils;
96 :     use Cwd;
97 :     use File::Copy;
98 :     use File::Path;
99 :     use FIG;
100 :     use SFXlate;
101 :     use Sprout;
102 : parrello 1.16 use CGI;
103 : parrello 1.20 use FIGRules;
104 : parrello 1.1
105 :     # Get the command-line options and parameters.
106 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
107 :     {
108 : parrello 1.11 groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
109 : parrello 1.16 trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
110 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
111 : parrello 1.16 outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
112 : parrello 1.23 orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
113 : parrello 1.1 },
114 :     "",
115 :     @ARGV);
116 :     # Set a variable to contain return type information.
117 :     my $rtype;
118 :     # Insure we catch errors.
119 :     eval {
120 : parrello 1.16 # Get a CGI object for building the output. We pass it the options hash so
121 :     # the formatting subroutines have access to it. Also, we want it to know
122 :     # we're not a real web script.
123 :     my $cgi = CGI->new($options);
124 :     # Start accumulating HTML data.
125 :     my @html = ();
126 :     # Open the output file. We do this early in case there's a problem.
127 :     my $outFileName = $options->{outFile};
128 :     Trace("Opening output file $outFileName.") if T(2);
129 :     Open(\*OUTPUT, ">$outFileName");
130 :     # Get a nice-looking version name and make it into a title.
131 :     my $version = uc $FIG_Config::nmpdr_version;
132 :     $version =~ tr/_/ /;
133 : parrello 1.23 push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
134 : parrello 1.16 # Start the table.
135 :     push @html, $cgi->start_table({align => "center", border => "2"});
136 : parrello 1.11 # Get the group file.
137 :     my $groupFileName = $options->{groupFile};
138 :     Trace("Reading group file $groupFileName.") if T(2);
139 : parrello 1.16 # We'll put each group's data into a hash, keyed by group
140 : parrello 1.11 # name, each entry being a 3-tuple of page name, genus,
141 :     # and species
142 : parrello 1.13 my %groups = Sprout::ReadGroupFile($groupFileName);
143 : parrello 1.2 Trace("Processing genomes.") if T(2);
144 : parrello 1.8 # Get the current SEED.
145 :     my $fig = FIG->new();
146 : parrello 1.1 # Get the old Sprout.
147 : parrello 1.22 my $oldSprout = SFXlate->old_sprout_only();
148 : parrello 1.1 # Get its genomes in alphabetical order.
149 : parrello 1.8 my @oldGenomes = GetGenomes($oldSprout);
150 : parrello 1.1 # Get the new Sprout.
151 :     my $newSprout = SFXlate->new_sprout_only();
152 :     # Get its genomes in alphabetical order.
153 : parrello 1.8 my @newGenomes = GetGenomes($newSprout);
154 : parrello 1.7 # Compare the two genomes lists.
155 : parrello 1.1 my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
156 : parrello 1.7 # Add feature counts to the new genomes.
157 :     for my $insertedGenome (@{$insertedGenomes}) {
158 :     my $genomeID = $insertedGenome->[0];
159 : parrello 1.20 # For a new genome, display the feature and BBH counts.
160 : parrello 1.7 my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
161 :     [$genomeID]);
162 :     my $suffix = ($count == 1 ? " one feature" : "$count features");
163 : parrello 1.20 my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
164 :     $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
165 : parrello 1.7 $insertedGenome->[1] .= "($suffix)";
166 :     }
167 : parrello 1.8 # Add information about SEED status to the deleted genomes.
168 :     for my $deletedGenome (@{$deletedGenomes}) {
169 :     my $genomeID = $deletedGenome->[0];
170 :     if ($fig->is_genome($genomeID)) {
171 :     my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
172 : parrello 1.9 $deletedGenome->[1] .= "(still in SEED, $complete)";
173 : parrello 1.8 }
174 :     }
175 : parrello 1.1 # Display the lists.
176 : parrello 1.16 push @html, ShowLists($cgi, 'New Genomes' => $insertedGenomes,
177 :     'Deleted Genomes' => $deletedGenomes);
178 : parrello 1.8 # Now the groups.
179 :     Trace("Comparing groups.") if T(2);
180 :     my %oldGroups = $oldSprout->GetGroups();
181 :     my %newGroups = $newSprout->GetGroups();
182 :     # Loop through the new groups.
183 :     for my $newGroup (sort keys %newGroups) {
184 :     Trace("Processing group $newGroup.") if T(3);
185 :     # Find out if this group is new to this version.
186 :     if (! exists $oldGroups{$newGroup}) {
187 :     # Construct a list of this group's genes.
188 :     my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
189 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
190 : parrello 1.8 } else {
191 : parrello 1.21 # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
192 :     # on the genome ID, which will become the second list element after
193 :     my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
194 :     my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
195 : parrello 1.10 Trace("Comparing lists for $newGroup.") if T(4);
196 : parrello 1.21 my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
197 : parrello 1.10 Trace("Comparison complete.") if T(4);
198 : parrello 1.16 # Delete the old group data. When we're done, this means the hash
199 :     # will contain only the deleted groups.
200 : parrello 1.8 delete $oldGroups{$newGroup};
201 :     # Show the lists. Empty lists will not be shown.
202 : parrello 1.10 Trace("Displaying group lists.") if T(4);
203 : parrello 1.16 push @html, ShowLists($cgi, "Genomes new to $newGroup" => $insertedGroupGenomes,
204 :     "Genomes no longer in $newGroup" => $deletedGroupGenomes);
205 : parrello 1.8 }
206 :     }
207 : parrello 1.10 Trace("Processing deleted groups.") if T(4);
208 : parrello 1.8 # Now list the deleted groups.
209 :     for my $oldGroup (sort keys %oldGroups) {
210 :     Trace("Processing deleted group $oldGroup.") if T(3);
211 :     my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
212 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
213 : parrello 1.8 }
214 : parrello 1.1 # Next, we get the subsystems.
215 : parrello 1.2 Trace("Processing subsystems.") if T(2);
216 : parrello 1.1 my @oldSubsystems = GetSubsystems($oldSprout);
217 :     my @newSubsystems = GetSubsystems($newSprout);
218 :     # Compare and display the subsystem lists.
219 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
220 : parrello 1.8 # Check the deleted subsystems to see if they're in SEED.
221 :     if (scalar @{$deletedSubs} > 0) {
222 :     my %subChecker = map { $_ => 1 } $fig->all_subsystems();
223 :     for my $deletedSub (@{$deletedSubs}) {
224 :     my $subID = $deletedSub->[0];
225 :     if ($subChecker{$subID}) {
226 :     my $trusted = ($fig->usable_subsystem($subID) ? "usable" : "not usable");
227 : parrello 1.9 $deletedSub->[1] .= " (still in SEED, $trusted)";
228 : parrello 1.8 }
229 :     }
230 :     }
231 : parrello 1.16 push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs,
232 :     'Deleted Subsystems' => $deletedSubs);
233 : parrello 1.23 # Print what we've done so far.
234 :     FlushData(\*OUTPUT, \@html);
235 :     # Now we need to process some statistics that require looping through all the
236 :     # features in the new sprout. While we're at it, we'll collect the BBH and
237 :     # coupling counts.
238 :     my $bbhCount = 0;
239 :     my $couplingCount = 0;
240 :     # One of the reports is only for genomes common to both sprouts. To help us
241 :     # make this determination, we get a hash of the inserted genomes.
242 :     my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
243 :     # Open the organism report file.
244 :     Open(\*ORGOUT, ">$options->{orgFile}");
245 :     # Start the table.
246 :     my @orgHtml = ();
247 :     push @orgHtml, $cgi->h4("Genome Report for $version");
248 :     push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
249 :     push @orgHtml, $cgi->Tr($cgi->th("Genome"),
250 :     $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
251 :     "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
252 :     FlushData(\*ORGOUT, \@orgHtml);
253 :     # Now we start the loop.
254 :     for my $genomeData (@newGenomes) {
255 :     # Get this genome's ID and name.
256 :     my $genomeID = $genomeData->[0];
257 :     # Create a title for it.
258 :     my $genomeTitle = "$genomeID: $genomeData->[1]";
259 :     # Compute its size.
260 :     my $genomeSize = $newSprout->GenomeLength($genomeID);
261 :     # Get a list of the genomes that are not this one.
262 :     my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
263 :     Trace("Computing BBH matrix for $genomeID.") if T(3);
264 :     # Get the bbh matrix going from the current gene to the others.
265 :     my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
266 :     # Set up the subsystem hash. This will be used to track which subsystems are used by
267 :     # the genome's features.
268 :     my %subHash = ();
269 :     # Set up the contig hash. This will be used to track which contigs are used by the
270 :     # genome's features.
271 :     my %contigHash = ();
272 :     # Set up a counter hash for feature types.
273 :     my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
274 :     # We'll store information about the genome's features (ID and functional role) in here.
275 :     my @newFeatures = ();
276 :     # Loop through the genome's features. The order is important here, because we need
277 :     # to match the order used by "GetFeatures" for the feature difference comparison.
278 :     Trace("Processing feature statistics for $genomeID.") if T(3);
279 :     my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
280 :     [$genomeID]);
281 :     # Loop through the features, updating the counts.
282 :     while (my $feature = $fquery->Fetch()) {
283 :     # Update the total feature count.
284 :     $counters{total}++;
285 :     Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
286 :     # Get the feature ID and role.
287 :     my $fid = $feature->PrimaryValue('Feature(id)');
288 :     push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
289 :     # Check to see if we have BBH data.
290 :     if (exists $matrix{$fid}) {
291 :     $bbhCount += scalar keys %{$matrix{$fid}};
292 :     }
293 :     # Ask for couplings.
294 :     my %coupleHash = $newSprout->CoupledFeatures($fid);
295 :     $couplingCount += keys %coupleHash;
296 :     # See if this feature is in a subsystem.
297 :     my %subs = $newSprout->SubsystemsOf($fid);
298 :     if (keys %subs) {
299 :     $counters{in_ss}++;
300 :     for my $sub (keys %subs) {
301 :     $subHash{$sub} = 1;
302 :     }
303 :     }
304 :     # Increment the feature type counter.
305 :     $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
306 :     # Insure we've tracked this feature's contigs.
307 :     my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
308 :     for my $loc (@locations) {
309 :     my $locObject = BasicLocation->new($loc);
310 :     $contigHash{$locObject->Contig} = 1;
311 :     }
312 :     }
313 :     Trace("Feature data compiled for $genomeID.") if T(3);
314 :     # The last thing we need to do is compute the number of features added or deleted.
315 :     # This goes in the genome report, but it's only meaningful for common genomes.
316 :     my ($addCount, $delCount) = ("","");
317 :     if (! $skipGenomes{$genomeID}) {
318 :     # Get the old features.
319 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
320 :     Trace("Comparing features for $genomeID.") if T(3);
321 :     # Compare the lists.
322 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
323 :     $addCount = scalar(@{$insertedFeatures});
324 :     $delCount = scalar(@{$deletedFeatures});
325 :     }
326 :     push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
327 :     $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
328 :     scalar(keys %subHash), $counters{in_ss}, $counters{peg},
329 :     $counters{rna}, $counters{pp}, $addCount, $delCount]));
330 :     FlushData(\*ORGOUT, \@orgHtml);
331 :     }
332 :     # Close the table for the genome report.
333 :     push @orgHtml, $cgi->end_table();
334 :     FlushData(\*ORGOUT, \@orgHtml);
335 :     close ORGOUT;
336 :     # Flush the genome feature comparison data.
337 :     FlushData(\*OUTPUT, \@html);
338 : parrello 1.11 # Next, we show some basic counts.
339 : parrello 1.23 Trace("Displaying counts.") if T(3);
340 : parrello 1.16 push @html, ShowTitle($cgi, "Statistics for old Sprout");
341 :     push @html, ShowCounts($cgi, $oldSprout);
342 :     push @html, ShowTitle($cgi, "Statistics for new Sprout");
343 :     push @html, ShowCounts($cgi, $newSprout);
344 : parrello 1.23 push @html, ShowDatum($cgi, BBHs => $bbhCount);
345 :     push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
346 :     FlushData(\*OUTPUT, \@html);
347 : parrello 1.11 # Now we show the genomes that are not in groups but could be. First, we convert
348 :     # our group hash from the new Sprout into the form used on the web site.
349 :     Trace("Examining possible missing genomes in groups.") if T(2);
350 :     my %fixedGroups = Sprout::Fix(%newGroups);
351 :     for my $group (sort keys %groups) {
352 :     Trace("Checking group $group.");
353 :     # Get this group's genus and species.
354 :     my $genus = $groups{$group}->[1];
355 :     my $species = $groups{$group}->[2];
356 :     # Get a hash of the genomes already in it.
357 :     my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
358 :     # Get a list of its possible genomes.
359 :     my $filter = 'Genome(genus) = ?';
360 :     my @parms = ($genus);
361 : parrello 1.13 # The species list is tricky because a given group may involve more than
362 :     # one target species. The species names will be comma-separated, and
363 :     # we use some PERL trickiness to generate an OR filter for them.
364 : parrello 1.11 if ($species) {
365 : parrello 1.13 # Get the individual species.
366 :     my @speciesList = split /\s*,\s*/, $species;
367 :     # Create one species filter per species.
368 :     my @filterClauses = map { 'Genome(species) = ?' } @speciesList;
369 :     # OR the filter clauses together to get a real filter.
370 :     $filter .= " AND (" . (join " OR ", @filterClauses) . ")";
371 :     # Add the specieis names to the SQL parameter list.
372 :     push @parms, @speciesList;
373 : parrello 1.11 }
374 :     my @possibles = $newSprout->GetFlat(['Genome'], $filter, \@parms, 'Genome(id)');
375 :     # Get the possibles that aren't in the group and add identifying information.
376 : parrello 1.12 my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
377 : parrello 1.11 # If anything survived, show the list.
378 :     if (@leftOut) {
379 : parrello 1.16 push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
380 : parrello 1.11 }
381 :     }
382 : parrello 1.23 FlushData(\*OUTPUT, \@html);
383 : parrello 1.16 # Close the table.
384 :     push @html, $cgi->end_table();
385 : parrello 1.23 # Flush the last of the HTML.
386 :     FlushData(\*OUTPUT, \@html);
387 : parrello 1.16 # Close the output file.
388 :     close OUTPUT;
389 :     Trace("Analysis complete.") if T(2);
390 : parrello 1.1 };
391 :     if ($@) {
392 :     Trace("Script failed with error: $@") if T(0);
393 :     $rtype = "error";
394 :     } else {
395 :     Trace("Script complete.") if T(2);
396 :     $rtype = "no error";
397 :     }
398 :     if ($options->{phone}) {
399 : parrello 1.11 my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
400 : parrello 1.1 if ($msgID) {
401 :     Trace("Phone message sent with ID $msgID.") if T(2);
402 :     } else {
403 :     Trace("Phone message not sent.") if T(2);
404 :     }
405 :     }
406 :    
407 : parrello 1.23 =head3 FlushData
408 :    
409 :     C<< FlushData($handle, \@lines); >>
410 :    
411 :     Write the specified lines to the output file and clear them out of the list. This
412 :     method is called periodically so that even if something goes wrong we can still
413 :     see the data accumulating in the output file.
414 :    
415 :     =over 4
416 :    
417 :     =item handle
418 :    
419 :     Output handle to which the lines should be written.
420 :    
421 :     =item lines
422 :    
423 :     Reference to a list of output lines. The output lines will be written to the output
424 :     handle and then removed from the list.
425 :    
426 :     =back
427 :    
428 :     =cut
429 :    
430 :     sub FlushData {
431 :     # Get the parameters.
432 :     my ($handle, $lines) = @_;
433 :     Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
434 :     # Write the lines.
435 :     print $handle join("\n", @{$lines});
436 :     # Write a terminating new-line.
437 :     print $handle "\n";
438 :     # Clear the list.
439 :     splice @{$lines};
440 :     }
441 :    
442 : parrello 1.8 =head3 GetGenomes
443 : parrello 1.1
444 : parrello 1.8 C<< my @geneList = GetGenomes($sprout); >>
445 : parrello 1.1
446 :     Return a list of the genomes in the specified Sprout instance. The genomes
447 :     are returned in alphabetical order by genome ID.
448 :    
449 :     =over 4
450 :    
451 :     =item sprout
452 :    
453 :     Sprout instance whose gene list is desired.
454 :    
455 :     =item RETURN
456 :    
457 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
458 :     and the second is the genome name (genus, species, strain).
459 :    
460 :     =back
461 :    
462 :     =cut
463 :    
464 : parrello 1.8 sub GetGenomes {
465 : parrello 1.1 # Get the parameters.
466 :     my ($sprout) = @_;
467 :     # Get the desired data.
468 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
469 :     'Genome(genus)',
470 :     'Genome(species)',
471 :     'Genome(unique-characterization)']);
472 :     # Create the genome names from the three pieces of the name.
473 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
474 :     # Return the result.
475 :     return @retVal;
476 :     }
477 :    
478 : parrello 1.8 =head3 NameGenomes
479 :    
480 : parrello 1.11 C<< my @newList = NameGenomes($sprout, \@genomes); >>
481 : parrello 1.8
482 :     Convert a list of genome IDs to a list of genome IDs with names.
483 :    
484 :     =over 4
485 :    
486 :     =item sprout
487 :    
488 :     The relevant sprout instance.
489 :    
490 :     =item genomes
491 :    
492 :     Reference to a list of genome IDs
493 :    
494 :     =item RETURN
495 :    
496 :     Returns a list of 2-tuples, each tuple consisting of a genome ID followed by a
497 :     genome name.
498 :    
499 :     =back
500 :    
501 :     =cut
502 :    
503 :     sub NameGenomes {
504 :     # Get the parameters.
505 :     my ($sprout, $genomes) = @_;
506 :     # Attach the names.
507 : parrello 1.16 my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
508 : parrello 1.8 # Return the result.
509 :     return @retVal;
510 :     }
511 :    
512 : parrello 1.1 =head3 GetSubsystems
513 :    
514 :     C<< my @subsystems = GetSubsystems($sprout); >>
515 :    
516 :     Get a list of the subsystems in the specified Sprout instance.
517 :    
518 :     =over 4
519 :    
520 :     =item sprout
521 :    
522 :     Sprout instance whose subsystems are desired.
523 :    
524 :     =item RETURN
525 :    
526 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
527 :     the name of the curator.
528 :    
529 :     =back
530 :    
531 :     =cut
532 :    
533 :     sub GetSubsystems {
534 :     # Get the parameters.
535 :     my ($sprout) = @_;
536 :     # Declare the return variable.
537 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
538 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
539 :     # Return the result.
540 :     return @retVal;
541 :     }
542 :    
543 : parrello 1.11 =head3 GetProperties
544 :    
545 :     C<< my @propertyList = GetProperties($sprout); >>
546 :    
547 :     Return a list of properties. Each element in the list will be a 2-tuple containing
548 :     the property name and value in the first column and its ID in the second column.
549 :    
550 :     =over 4
551 :    
552 :     =item sprout
553 :    
554 :     Sprout instance to be used to retrieve the properties.
555 :    
556 :     =item RETURN
557 :    
558 :     Returns a list of 2-tuples. The first element in each 2-tuple will be a string
559 :     in the form of an assignment of the property value to the property name. The second
560 :     element will be the number of features possessing the property. The list will be
561 :     sorted in ascending alphabetical order.
562 :    
563 :     =back
564 :    
565 :     =cut
566 :    
567 :     sub GetProperties {
568 :     # Get the parameters.
569 :     my ($sprout) = @_;
570 :     # Get the properties.
571 :     my @props = $sprout->GetAll(['Property'],
572 :     "ORDER BY Property(property-name), Property(property-value)", [],
573 :     ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
574 :     # Combine the property names and values and replace each property ID by a feature count.
575 :     my @retVal;
576 :     for my $propItem (@props) {
577 : parrello 1.19 # Split up the value on punctuation boundaries for readability.
578 :     my $propValue = $propItem->[1];
579 :     $propValue =~ s/::/ :: /g;
580 :     $propValue =~ s/([,;])(\S)/$1 $2/g;
581 :     my $label = $propItem->[0] . " = " . $propValue;
582 : parrello 1.11 my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
583 :     [$propItem->[2]]);
584 :     push @retVal, [$label, $count];
585 :     }
586 :     # Return the result.
587 :     return @retVal;
588 :     }
589 :    
590 : parrello 1.1 =head3 GetFeatures
591 :    
592 :     C<< my @features = GetFeatures($sprout, $genomeID); >>
593 :    
594 :     Return the features of the specified genome in the specified Sprout instance.
595 :    
596 :     =over 4
597 :    
598 :     =item sprout
599 :    
600 :     Sprout instance to use to get the features.
601 :    
602 :     =item genomeID
603 :    
604 :     ID of the genome in question.
605 :    
606 :     =item RETURN
607 :    
608 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
609 :     functional assignment (if any).
610 :    
611 :     =back
612 :    
613 :     =cut
614 :    
615 :     sub GetFeatures {
616 :     # Get the parameters.
617 :     my ($sprout, $genomeID) = @_;
618 :     # Get a list of the feature IDs and map them to their functional assignments.
619 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
620 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
621 :     [$genomeID], 'HasFeature(to-link)');
622 :     # Return the result.
623 :     return @retVal;
624 :     }
625 :    
626 :     =head3 ShowLists
627 :    
628 : parrello 1.16 C<< my @htmlLines = ShowLists($cgi, %lists); >>
629 : parrello 1.1
630 : parrello 1.16 Display a set of lists. Each list should consist of 2-tuples, and the list
631 :     entries will be displayed as 2-element table rows with a header row.
632 : parrello 1.1
633 :     =over 4
634 :    
635 : parrello 1.16 =item cgi
636 : parrello 1.2
637 : parrello 1.16 A CGI query object containing the options for this program. It is also used to format
638 :     HTML.
639 : parrello 1.2
640 : parrello 1.1 =item lists
641 :    
642 :     A hash mapping list names to list references.
643 :    
644 : parrello 1.16 =item RETURN
645 :    
646 :     Returns a list of HTML lines displaying the list in tabular form.
647 :    
648 : parrello 1.15 =back
649 :    
650 : parrello 1.1 =cut
651 :    
652 :     sub ShowLists {
653 :     # Get the parameters.
654 : parrello 1.16 my $cgi = shift @_;
655 : parrello 1.1 my %lists = @_;
656 : parrello 1.16 # Declare the return variable. The HTML lines will be accumulated
657 :     # in here and then joined with new-lines.
658 :     my @retVal = ();
659 : parrello 1.1 # Loop through the lists in alphabetical order by list name.
660 : parrello 1.6 for my $listName (sort keys %lists) {
661 : parrello 1.1 # Get the list itself.
662 :     my $list = $lists{$listName};
663 :     # Get the number of list items.
664 :     my $listSize = scalar @{$list};
665 : parrello 1.6 # Only proceed if the list is nonempty.
666 :     if ($listSize > 0) {
667 : parrello 1.16 my $header = ComputeHeader($listName, $listSize);
668 : parrello 1.6 Trace($header) if T(3);
669 : parrello 1.16 # Display the header line as a header.
670 :     push @retVal, ShowTitle($cgi, $header);
671 : parrello 1.17 # Now display the list as table rows. Note we convert underbars to spaces
672 :     # in the name row to make the table easier to fit into narrow places.
673 : parrello 1.16 for my $entry (@{$list}) {
674 :     my ($name, $data) = @{$entry};
675 : parrello 1.18 $name =~ tr/_/ /;
676 : parrello 1.23 push @retVal, ShowDatum($cgi, $name => $data);
677 : parrello 1.1 }
678 :     }
679 :     }
680 : parrello 1.16 # Return the list of HTML lines.
681 :     return @retVal;
682 : parrello 1.1 }
683 :    
684 : parrello 1.16 =head3 ComputeHeader
685 : parrello 1.7
686 : parrello 1.16 C<< my $header = ComputeHeader($name, $count); >>
687 : parrello 1.7
688 :     Return a list header for a list of the specified length.
689 :    
690 :     =over 4
691 :    
692 :     =item name
693 :    
694 :     Name of the list.
695 :    
696 :     =item count
697 :    
698 :     Number of entries in the list.
699 :    
700 :     =item RETURN
701 :    
702 :     Returns a list header that shows the name of the list and the number of entries.
703 :    
704 :     =back
705 :    
706 :     =cut
707 :    
708 : parrello 1.16 sub ComputeHeader {
709 : parrello 1.7 # Get the parameters.
710 :     my ($name, $count) = @_;
711 :     # Declare the return variable.
712 : parrello 1.15 my $retVal;
713 : parrello 1.7 if ($count == 0) {
714 : parrello 1.16 $retVal = "$name: none";
715 : parrello 1.7 } elsif ($count == 1) {
716 : parrello 1.16 $retVal = "$name: one";
717 : parrello 1.7 } else {
718 : parrello 1.16 $retVal = "$name: $count";
719 : parrello 1.7 }
720 :     # Return the result.
721 :     return $retVal;
722 :     }
723 :    
724 : parrello 1.11 =head3 ShowCounts
725 :    
726 :     C<< ShowCounts($sprout); >>
727 :    
728 :     Display general counts for the specified sprout instance. These counts are
729 :     used in progress reports.
730 :    
731 :     =over 4
732 :    
733 : parrello 1.16 =item cgi
734 :    
735 :     CGI query object used to format the output.
736 :    
737 : parrello 1.11 =item sprout
738 :    
739 :     Sprout instance for which counts are to be produced.
740 :    
741 : parrello 1.16 =item RETURN
742 :    
743 :     Returns a list of HTML lines with the counts arranged in table rows.
744 :    
745 : parrello 1.11 =back
746 :    
747 :     =cut
748 :    
749 :     sub ShowCounts {
750 :     # Get the parameters.
751 : parrello 1.16 my ($cgi, $sprout) = @_;
752 : parrello 1.11 # Count genomes and subsystems.
753 :     my $genomes = $sprout->GetCount(['Genome']);
754 :     my $subsystems = $sprout->GetCount(['Subsystem']);
755 : parrello 1.23 # Count roles and external functional assignments.
756 : parrello 1.11 my $roles = $sprout->GetCount(['OccursInSubsystem']);
757 :     my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
758 : parrello 1.16 # Count features.
759 : parrello 1.11 my $features = $sprout->GetCount(['Feature']);
760 :     # Display the counts.
761 : parrello 1.16 my @retVal = ();
762 : parrello 1.23 push @retVal, ShowDatum($cgi, Genomes => $genomes);
763 :     push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
764 :     push @retVal, ShowDatum($cgi, Roles => $roles);
765 :     push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
766 :     push @retVal, ShowDatum($cgi, Features => $features);
767 : parrello 1.16 # Return the html.
768 :     return @retVal;
769 :     }
770 :    
771 : parrello 1.23 =head3 ShowDatum
772 :    
773 :     C<< my $htmlText = ShowDatum($cgi, $label, $value); >>
774 :    
775 :     Return a table row displaying the specified label and value.
776 :    
777 :     =over 4
778 :    
779 :     =item cgi
780 :    
781 :     CGI query object used to generate the HTML text.
782 :    
783 :     =item label
784 :    
785 :     Label to appear in the left cell of the table row.
786 :    
787 :     =item value
788 :    
789 :     Value to appear in the right cell of the table row.
790 :    
791 :     =item RETURN
792 :    
793 :     Returns the HTML for a single table row with the last cell right-aligned.
794 :    
795 :     =back
796 :    
797 :     =cut
798 :    
799 :     sub ShowDatum {
800 :     # Get the parameters.
801 :     my ($cgi, $label, $value) = @_;
802 :     # Create the table row.
803 :     my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
804 :     # Return it.
805 :     return $retVal;
806 :     }
807 :    
808 : parrello 1.16 =head3 ShowTitle
809 :    
810 :     C<< my $html = ShowTitle($cgi, $title); >>
811 :    
812 :     Display a title line. This will be a merged table row with bolded text.
813 :    
814 :     =over 4
815 :    
816 :     =item cgi
817 :    
818 :     CGI query object used to generate HTML output.
819 :    
820 :     =item RETURN
821 :    
822 :     Returns the HTML text.
823 :    
824 :     =back
825 :    
826 :     =cut
827 :    
828 :     sub ShowTitle {
829 :     # Get the parameters.
830 :     my ($cgi, $title) = @_;
831 :     # Declare the return variable.
832 :     my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
833 :     # Return the result.
834 :     return $retVal;
835 : parrello 1.11 }
836 : parrello 1.16
837 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3