[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.26 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 : parrello 1.16 the old and new sprouts and lists the differences and produces
7 :     a report in HTML. The output is an HTML fragment, not an entire
8 :     web page. This is because we expect it to be included in another
9 :     page.
10 : parrello 1.1
11 : parrello 1.11 The currently-supported command-line options for NewStuffCheck are as follows.
12 : parrello 1.1
13 :     =over 4
14 :    
15 :     =item user
16 :    
17 :     Name suffix to be used for log files. If omitted, the PID is used.
18 :    
19 :     =item trace
20 :    
21 :     Numeric trace level. A higher trace level causes more messages to appear. The
22 :     default trace level is 2. Tracing will be directly to the standard output
23 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
24 :     where I<User> is the value of the B<user> option above.
25 :    
26 :     =item sql
27 :    
28 :     If specified, turns on tracing of SQL activity.
29 :    
30 :     =item background
31 :    
32 :     Save the standard and error output to files. The files will be created
33 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
34 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
35 :     B<user> option above.
36 :    
37 :     =item h
38 :    
39 :     Display this command's parameters and options.
40 :    
41 :     =item phone
42 :    
43 :     Phone number to message when the script is complete.
44 :    
45 : parrello 1.11 =item groupFile
46 :    
47 :     Name of the group file (described below). The default is C<groups.tbl>
48 :     in the Sprout data directory.
49 :    
50 : parrello 1.16 =item outFile
51 :    
52 : parrello 1.19 Output file name. The default is C<html/includes/diff.inc> in the
53 : parrello 1.16 nmpdr C<next> directory.
54 :    
55 : parrello 1.23 =item orgFile
56 :    
57 :     Output file for the genome report. The default is C<html/includes/genomes.inc> in
58 :     the nmpdr C<next> directory.
59 :    
60 : parrello 1.11 =back
61 :    
62 :     =head2 The Group File
63 :    
64 :     A key data file for this process is C<groups.tbl>. This file is kept in the
65 :     Sprout Data directory, and contains the following columns:
66 :    
67 :     =over 4
68 :    
69 :     =item name
70 :    
71 :     Name of the group.
72 :    
73 :     =item page
74 :    
75 :     Name of the group's page on the web site (e.g. C<campy.php> for
76 :     Campylobacter)
77 :    
78 :     =item genus
79 :    
80 :     Genus of the group
81 :    
82 :     =item species
83 :    
84 :     Species of the group, or an empty string if the group is for an entire
85 : parrello 1.13 genus. If the group contains more than one species, the species names
86 :     should be separated by commas.
87 : parrello 1.11
88 : parrello 1.1 =back
89 :    
90 :     =cut
91 :    
92 :     use strict;
93 :     use Tracer;
94 :     use Cwd;
95 :     use File::Copy;
96 :     use File::Path;
97 :     use FIG;
98 :     use SFXlate;
99 :     use Sprout;
100 : parrello 1.16 use CGI;
101 : parrello 1.20 use FIGRules;
102 : parrello 1.1
103 :     # Get the command-line options and parameters.
104 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
105 :     {
106 : parrello 1.11 groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
107 : parrello 1.16 trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
108 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
109 : parrello 1.16 outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
110 : parrello 1.23 orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
111 : parrello 1.1 },
112 :     "",
113 :     @ARGV);
114 :     # Set a variable to contain return type information.
115 :     my $rtype;
116 :     # Insure we catch errors.
117 :     eval {
118 : parrello 1.16 # Get a CGI object for building the output. We pass it the options hash so
119 :     # the formatting subroutines have access to it. Also, we want it to know
120 :     # we're not a real web script.
121 :     my $cgi = CGI->new($options);
122 :     # Start accumulating HTML data.
123 :     my @html = ();
124 :     # Open the output file. We do this early in case there's a problem.
125 :     my $outFileName = $options->{outFile};
126 :     Trace("Opening output file $outFileName.") if T(2);
127 :     Open(\*OUTPUT, ">$outFileName");
128 :     # Get a nice-looking version name and make it into a title.
129 :     my $version = uc $FIG_Config::nmpdr_version;
130 :     $version =~ tr/_/ /;
131 : parrello 1.23 push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
132 : parrello 1.16 # Start the table.
133 :     push @html, $cgi->start_table({align => "center", border => "2"});
134 : parrello 1.11 # Get the group file.
135 :     my $groupFileName = $options->{groupFile};
136 :     Trace("Reading group file $groupFileName.") if T(2);
137 : parrello 1.2 Trace("Processing genomes.") if T(2);
138 : parrello 1.8 # Get the current SEED.
139 :     my $fig = FIG->new();
140 : parrello 1.1 # Get the old Sprout.
141 : parrello 1.22 my $oldSprout = SFXlate->old_sprout_only();
142 : parrello 1.1 # Get its genomes in alphabetical order.
143 : parrello 1.8 my @oldGenomes = GetGenomes($oldSprout);
144 : parrello 1.1 # Get the new Sprout.
145 :     my $newSprout = SFXlate->new_sprout_only();
146 :     # Get its genomes in alphabetical order.
147 : parrello 1.8 my @newGenomes = GetGenomes($newSprout);
148 : parrello 1.7 # Compare the two genomes lists.
149 : parrello 1.1 my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
150 : parrello 1.24 # Get the super-group data.
151 :     my %superTable = $newSprout->CheckGroupFile();
152 :     # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
153 :     # lists so that the genome names are first and the IDs second.
154 :     my @insertedGenomeList = ();
155 : parrello 1.7 for my $insertedGenome (@{$insertedGenomes}) {
156 :     my $genomeID = $insertedGenome->[0];
157 : parrello 1.20 # For a new genome, display the feature and BBH counts.
158 : parrello 1.7 my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
159 :     [$genomeID]);
160 : parrello 1.24 my $suffix = ($count == 1 ? "one feature" : "$count features");
161 : parrello 1.20 my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
162 :     $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
163 : parrello 1.24 push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
164 : parrello 1.7 }
165 : parrello 1.24 # Create a list for the deleted genomes that contains information about SEED status.
166 :     # This list is flipped, too.
167 :     my @deletedGenomeList = ();
168 : parrello 1.8 for my $deletedGenome (@{$deletedGenomes}) {
169 :     my $genomeID = $deletedGenome->[0];
170 : parrello 1.24 my $suffix = "";
171 : parrello 1.8 if ($fig->is_genome($genomeID)) {
172 : parrello 1.24 # Here the deleted genome is still in the SEED.
173 : parrello 1.8 my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
174 : parrello 1.24 $suffix = " (still in SEED, $complete)";
175 :     } else {
176 :     # It's not in the SEED. See if it has been replaced.
177 :     my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
178 :     my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
179 :     if (scalar @genomeIDs) {
180 :     $suffix = " (replaced)";
181 :     }
182 : parrello 1.8 }
183 : parrello 1.24 push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
184 : parrello 1.8 }
185 : parrello 1.1 # Display the lists.
186 : parrello 1.24 push @html, ShowLists($cgi, 'New Genomes' => \@insertedGenomeList,
187 :     'Deleted Genomes' => \@deletedGenomeList);
188 : parrello 1.8 # Now the groups.
189 :     Trace("Comparing groups.") if T(2);
190 :     my %oldGroups = $oldSprout->GetGroups();
191 :     my %newGroups = $newSprout->GetGroups();
192 :     # Loop through the new groups.
193 :     for my $newGroup (sort keys %newGroups) {
194 :     Trace("Processing group $newGroup.") if T(3);
195 :     # Find out if this group is new to this version.
196 :     if (! exists $oldGroups{$newGroup}) {
197 :     # Construct a list of this group's genes.
198 :     my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
199 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
200 : parrello 1.8 } else {
201 : parrello 1.21 # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
202 : parrello 1.24 # on the genome ID, which will become the second list element after the call to NameGenomes.
203 : parrello 1.21 my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
204 :     my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
205 : parrello 1.10 Trace("Comparing lists for $newGroup.") if T(4);
206 : parrello 1.21 my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
207 : parrello 1.10 Trace("Comparison complete.") if T(4);
208 : parrello 1.16 # Delete the old group data. When we're done, this means the hash
209 :     # will contain only the deleted groups.
210 : parrello 1.8 delete $oldGroups{$newGroup};
211 :     # Show the lists. Empty lists will not be shown.
212 : parrello 1.10 Trace("Displaying group lists.") if T(4);
213 : parrello 1.16 push @html, ShowLists($cgi, "Genomes new to $newGroup" => $insertedGroupGenomes,
214 :     "Genomes no longer in $newGroup" => $deletedGroupGenomes);
215 : parrello 1.8 }
216 :     }
217 : parrello 1.10 Trace("Processing deleted groups.") if T(4);
218 : parrello 1.8 # Now list the deleted groups.
219 :     for my $oldGroup (sort keys %oldGroups) {
220 :     Trace("Processing deleted group $oldGroup.") if T(3);
221 :     my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
222 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
223 : parrello 1.8 }
224 : parrello 1.1 # Next, we get the subsystems.
225 : parrello 1.2 Trace("Processing subsystems.") if T(2);
226 : parrello 1.1 my @oldSubsystems = GetSubsystems($oldSprout);
227 :     my @newSubsystems = GetSubsystems($newSprout);
228 :     # Compare and display the subsystem lists.
229 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
230 : parrello 1.8 # Check the deleted subsystems to see if they're in SEED.
231 :     if (scalar @{$deletedSubs} > 0) {
232 :     my %subChecker = map { $_ => 1 } $fig->all_subsystems();
233 :     for my $deletedSub (@{$deletedSubs}) {
234 :     my $subID = $deletedSub->[0];
235 :     if ($subChecker{$subID}) {
236 :     my $trusted = ($fig->usable_subsystem($subID) ? "usable" : "not usable");
237 : parrello 1.9 $deletedSub->[1] .= " (still in SEED, $trusted)";
238 : parrello 1.8 }
239 :     }
240 :     }
241 : parrello 1.16 push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs,
242 :     'Deleted Subsystems' => $deletedSubs);
243 : parrello 1.23 # Print what we've done so far.
244 :     FlushData(\*OUTPUT, \@html);
245 :     # Now we need to process some statistics that require looping through all the
246 :     # features in the new sprout. While we're at it, we'll collect the BBH and
247 :     # coupling counts.
248 :     my $bbhCount = 0;
249 :     my $couplingCount = 0;
250 : parrello 1.24 # We'll accumulate a report of genomes with missing BBHs in here.
251 :     my @bbhMissingGenomes = ();
252 : parrello 1.23 # One of the reports is only for genomes common to both sprouts. To help us
253 :     # make this determination, we get a hash of the inserted genomes.
254 :     my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
255 :     # Open the organism report file.
256 :     Open(\*ORGOUT, ">$options->{orgFile}");
257 :     # Start the table.
258 :     my @orgHtml = ();
259 : parrello 1.26 push @orgHtml, $cgi->h4({ align => 'center' }, "Genome Report for $version");
260 : parrello 1.23 push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
261 :     push @orgHtml, $cgi->Tr($cgi->th("Genome"),
262 :     $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
263 :     "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
264 :     FlushData(\*ORGOUT, \@orgHtml);
265 : parrello 1.24 # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
266 :     # not the list of genomes that are new!
267 : parrello 1.23 for my $genomeData (@newGenomes) {
268 :     # Get this genome's ID and name.
269 :     my $genomeID = $genomeData->[0];
270 :     # Create a title for it.
271 :     my $genomeTitle = "$genomeID: $genomeData->[1]";
272 :     # Compute its size.
273 :     my $genomeSize = $newSprout->GenomeLength($genomeID);
274 : parrello 1.24 # Get a list of the genomes in the new Sprout that are not this one.
275 : parrello 1.23 my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
276 :     Trace("Computing BBH matrix for $genomeID.") if T(3);
277 : parrello 1.24 # Get the bbh matrix going from the current genome to the others.
278 : parrello 1.23 my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
279 :     # Set up the subsystem hash. This will be used to track which subsystems are used by
280 :     # the genome's features.
281 :     my %subHash = ();
282 :     # Set up the contig hash. This will be used to track which contigs are used by the
283 :     # genome's features.
284 :     my %contigHash = ();
285 :     # Set up a counter hash for feature types.
286 :     my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
287 :     # We'll store information about the genome's features (ID and functional role) in here.
288 :     my @newFeatures = ();
289 : parrello 1.24 # Finally, we'll use this flag to warn us if there are no BBHs.
290 :     my $bbhsFound = 0;
291 : parrello 1.23 # Loop through the genome's features. The order is important here, because we need
292 :     # to match the order used by "GetFeatures" for the feature difference comparison.
293 :     Trace("Processing feature statistics for $genomeID.") if T(3);
294 :     my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
295 :     [$genomeID]);
296 :     # Loop through the features, updating the counts.
297 :     while (my $feature = $fquery->Fetch()) {
298 :     # Update the total feature count.
299 :     $counters{total}++;
300 :     Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
301 :     # Get the feature ID and role.
302 :     my $fid = $feature->PrimaryValue('Feature(id)');
303 :     push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
304 :     # Check to see if we have BBH data.
305 :     if (exists $matrix{$fid}) {
306 : parrello 1.24 my $fidBbhCount = scalar keys %{$matrix{$fid}};
307 :     if ($fidBbhCount > 0) {
308 :     # Denote that this feature has BBHs.
309 :     $bbhsFound = 1;
310 :     # Add them to the total BBH count.
311 :     $bbhCount += $fidBbhCount;
312 :     }
313 : parrello 1.23 }
314 :     # Ask for couplings.
315 :     my %coupleHash = $newSprout->CoupledFeatures($fid);
316 :     $couplingCount += keys %coupleHash;
317 :     # See if this feature is in a subsystem.
318 :     my %subs = $newSprout->SubsystemsOf($fid);
319 :     if (keys %subs) {
320 :     $counters{in_ss}++;
321 :     for my $sub (keys %subs) {
322 :     $subHash{$sub} = 1;
323 :     }
324 :     }
325 :     # Increment the feature type counter.
326 :     $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
327 :     # Insure we've tracked this feature's contigs.
328 :     my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
329 :     for my $loc (@locations) {
330 :     my $locObject = BasicLocation->new($loc);
331 :     $contigHash{$locObject->Contig} = 1;
332 :     }
333 :     }
334 :     Trace("Feature data compiled for $genomeID.") if T(3);
335 :     # The last thing we need to do is compute the number of features added or deleted.
336 :     # This goes in the genome report, but it's only meaningful for common genomes.
337 :     my ($addCount, $delCount) = ("","");
338 :     if (! $skipGenomes{$genomeID}) {
339 :     # Get the old features.
340 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
341 :     Trace("Comparing features for $genomeID.") if T(3);
342 :     # Compare the lists.
343 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
344 :     $addCount = scalar(@{$insertedFeatures});
345 :     $delCount = scalar(@{$deletedFeatures});
346 :     }
347 : parrello 1.24 # Check to see if this genome is missing its BBHs.
348 :     if (! $bbhsFound) {
349 :     # It is, so add a line for it to the missing-BBH list.
350 :     push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
351 :     }
352 : parrello 1.23 push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
353 :     $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
354 :     scalar(keys %subHash), $counters{in_ss}, $counters{peg},
355 :     $counters{rna}, $counters{pp}, $addCount, $delCount]));
356 :     FlushData(\*ORGOUT, \@orgHtml);
357 :     }
358 :     # Close the table for the genome report.
359 :     push @orgHtml, $cgi->end_table();
360 :     FlushData(\*ORGOUT, \@orgHtml);
361 :     close ORGOUT;
362 : parrello 1.24 # Check for a missing-BBH report.
363 :     if (scalar @bbhMissingGenomes) {
364 :     # There is a report, so put it into the output stream.
365 :     push @html, ShowTitle($cgi, "Genomes without BBHs");
366 :     push @html, @bbhMissingGenomes;
367 :     }
368 :     # Flush the genome feature comparison data and the missing-BBH report (if any).
369 : parrello 1.23 FlushData(\*OUTPUT, \@html);
370 : parrello 1.11 # Next, we show some basic counts.
371 : parrello 1.23 Trace("Displaying counts.") if T(3);
372 : parrello 1.16 push @html, ShowTitle($cgi, "Statistics for old Sprout");
373 :     push @html, ShowCounts($cgi, $oldSprout);
374 :     push @html, ShowTitle($cgi, "Statistics for new Sprout");
375 :     push @html, ShowCounts($cgi, $newSprout);
376 : parrello 1.23 push @html, ShowDatum($cgi, BBHs => $bbhCount);
377 :     push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
378 :     FlushData(\*OUTPUT, \@html);
379 : parrello 1.11 # Now we show the genomes that are not in groups but could be. First, we convert
380 :     # our group hash from the new Sprout into the form used on the web site.
381 :     Trace("Examining possible missing genomes in groups.") if T(2);
382 : parrello 1.24 my %fixedGroups = $newSprout->Fix(%newGroups);
383 :     for my $group (sort keys %superTable) {
384 : parrello 1.11 Trace("Checking group $group.");
385 : parrello 1.25 # Loop through this group's genus/species pairs creating filters
386 :     # for a genome query.
387 :     my @filters = ();
388 :     my @filterParms = ();
389 :     for my $genusSpecies (@{$superTable{$group}->{content}}) {
390 :     my ($genus, $species) = @{$genusSpecies};
391 :     # Filter on genus.
392 :     my $filter = 'Genome(genus) = ?';
393 :     push @filterParms, $genus;
394 :     # If necessary, filter on species.
395 :     if ($species) {
396 :     $filter .= ' AND Genome(species) = ?';
397 :     push @filterParms, $species;
398 :     }
399 :     # Add this filter to the list.
400 :     push @filters, "($filter)";
401 :     }
402 :     # Get all the genomes that should be in the super-group.
403 :     my @possibles = $newSprout->GetFlat(['Genome'], join(" OR ", @filters),
404 :     \@filterParms, 'Genome(id)');
405 : parrello 1.11 # Get a hash of the genomes already in it.
406 :     my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
407 :     # Get the possibles that aren't in the group and add identifying information.
408 : parrello 1.12 my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
409 : parrello 1.11 # If anything survived, show the list.
410 :     if (@leftOut) {
411 : parrello 1.16 push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
412 : parrello 1.11 }
413 :     }
414 : parrello 1.23 FlushData(\*OUTPUT, \@html);
415 : parrello 1.16 # Close the table.
416 :     push @html, $cgi->end_table();
417 : parrello 1.23 # Flush the last of the HTML.
418 :     FlushData(\*OUTPUT, \@html);
419 : parrello 1.16 # Close the output file.
420 :     close OUTPUT;
421 :     Trace("Analysis complete.") if T(2);
422 : parrello 1.1 };
423 :     if ($@) {
424 :     Trace("Script failed with error: $@") if T(0);
425 :     $rtype = "error";
426 :     } else {
427 :     Trace("Script complete.") if T(2);
428 :     $rtype = "no error";
429 :     }
430 :     if ($options->{phone}) {
431 : parrello 1.11 my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
432 : parrello 1.1 if ($msgID) {
433 :     Trace("Phone message sent with ID $msgID.") if T(2);
434 :     } else {
435 :     Trace("Phone message not sent.") if T(2);
436 :     }
437 :     }
438 :    
439 : parrello 1.23 =head3 FlushData
440 :    
441 : parrello 1.24 FlushData($handle, \@lines);
442 : parrello 1.23
443 :     Write the specified lines to the output file and clear them out of the list. This
444 :     method is called periodically so that even if something goes wrong we can still
445 : parrello 1.24 see the data accumulating in the output file. The key aspect here is that we
446 :     put new-line characters after each line written and show something in the trace
447 :     log.
448 : parrello 1.23
449 :     =over 4
450 :    
451 :     =item handle
452 :    
453 :     Output handle to which the lines should be written.
454 :    
455 :     =item lines
456 :    
457 :     Reference to a list of output lines. The output lines will be written to the output
458 :     handle and then removed from the list.
459 :    
460 :     =back
461 :    
462 :     =cut
463 :    
464 :     sub FlushData {
465 :     # Get the parameters.
466 :     my ($handle, $lines) = @_;
467 :     Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
468 :     # Write the lines.
469 :     print $handle join("\n", @{$lines});
470 :     # Write a terminating new-line.
471 :     print $handle "\n";
472 :     # Clear the list.
473 :     splice @{$lines};
474 :     }
475 :    
476 : parrello 1.8 =head3 GetGenomes
477 : parrello 1.1
478 : parrello 1.24 my @geneList = GetGenomes($sprout);
479 : parrello 1.1
480 :     Return a list of the genomes in the specified Sprout instance. The genomes
481 :     are returned in alphabetical order by genome ID.
482 :    
483 :     =over 4
484 :    
485 :     =item sprout
486 :    
487 :     Sprout instance whose gene list is desired.
488 :    
489 :     =item RETURN
490 :    
491 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
492 :     and the second is the genome name (genus, species, strain).
493 :    
494 :     =back
495 :    
496 :     =cut
497 :    
498 : parrello 1.8 sub GetGenomes {
499 : parrello 1.1 # Get the parameters.
500 :     my ($sprout) = @_;
501 :     # Get the desired data.
502 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
503 :     'Genome(genus)',
504 :     'Genome(species)',
505 :     'Genome(unique-characterization)']);
506 :     # Create the genome names from the three pieces of the name.
507 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
508 :     # Return the result.
509 :     return @retVal;
510 :     }
511 :    
512 : parrello 1.8 =head3 NameGenomes
513 :    
514 : parrello 1.24 my @newList = NameGenomes($sprout, \@genomes);
515 : parrello 1.8
516 :     Convert a list of genome IDs to a list of genome IDs with names.
517 :    
518 :     =over 4
519 :    
520 :     =item sprout
521 :    
522 :     The relevant sprout instance.
523 :    
524 :     =item genomes
525 :    
526 :     Reference to a list of genome IDs
527 :    
528 :     =item RETURN
529 :    
530 :     Returns a list of 2-tuples, each tuple consisting of a genome ID followed by a
531 :     genome name.
532 :    
533 :     =back
534 :    
535 :     =cut
536 :    
537 :     sub NameGenomes {
538 :     # Get the parameters.
539 :     my ($sprout, $genomes) = @_;
540 :     # Attach the names.
541 : parrello 1.16 my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
542 : parrello 1.8 # Return the result.
543 :     return @retVal;
544 :     }
545 :    
546 : parrello 1.1 =head3 GetSubsystems
547 :    
548 : parrello 1.24 my @subsystems = GetSubsystems($sprout);
549 : parrello 1.1
550 :     Get a list of the subsystems in the specified Sprout instance.
551 :    
552 :     =over 4
553 :    
554 :     =item sprout
555 :    
556 :     Sprout instance whose subsystems are desired.
557 :    
558 :     =item RETURN
559 :    
560 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
561 :     the name of the curator.
562 :    
563 :     =back
564 :    
565 :     =cut
566 :    
567 :     sub GetSubsystems {
568 :     # Get the parameters.
569 :     my ($sprout) = @_;
570 :     # Declare the return variable.
571 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
572 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
573 :     # Return the result.
574 :     return @retVal;
575 :     }
576 :    
577 : parrello 1.11 =head3 GetProperties
578 :    
579 : parrello 1.24 my @propertyList = GetProperties($sprout);
580 : parrello 1.11
581 :     Return a list of properties. Each element in the list will be a 2-tuple containing
582 :     the property name and value in the first column and its ID in the second column.
583 :    
584 :     =over 4
585 :    
586 :     =item sprout
587 :    
588 :     Sprout instance to be used to retrieve the properties.
589 :    
590 :     =item RETURN
591 :    
592 :     Returns a list of 2-tuples. The first element in each 2-tuple will be a string
593 :     in the form of an assignment of the property value to the property name. The second
594 :     element will be the number of features possessing the property. The list will be
595 :     sorted in ascending alphabetical order.
596 :    
597 :     =back
598 :    
599 :     =cut
600 :    
601 :     sub GetProperties {
602 :     # Get the parameters.
603 :     my ($sprout) = @_;
604 :     # Get the properties.
605 :     my @props = $sprout->GetAll(['Property'],
606 :     "ORDER BY Property(property-name), Property(property-value)", [],
607 :     ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
608 :     # Combine the property names and values and replace each property ID by a feature count.
609 :     my @retVal;
610 :     for my $propItem (@props) {
611 : parrello 1.19 # Split up the value on punctuation boundaries for readability.
612 :     my $propValue = $propItem->[1];
613 :     $propValue =~ s/::/ :: /g;
614 :     $propValue =~ s/([,;])(\S)/$1 $2/g;
615 :     my $label = $propItem->[0] . " = " . $propValue;
616 : parrello 1.11 my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
617 :     [$propItem->[2]]);
618 :     push @retVal, [$label, $count];
619 :     }
620 :     # Return the result.
621 :     return @retVal;
622 :     }
623 :    
624 : parrello 1.1 =head3 GetFeatures
625 :    
626 : parrello 1.24 my @features = GetFeatures($sprout, $genomeID);
627 : parrello 1.1
628 :     Return the features of the specified genome in the specified Sprout instance.
629 :    
630 :     =over 4
631 :    
632 :     =item sprout
633 :    
634 :     Sprout instance to use to get the features.
635 :    
636 :     =item genomeID
637 :    
638 :     ID of the genome in question.
639 :    
640 :     =item RETURN
641 :    
642 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
643 :     functional assignment (if any).
644 :    
645 :     =back
646 :    
647 :     =cut
648 :    
649 :     sub GetFeatures {
650 :     # Get the parameters.
651 :     my ($sprout, $genomeID) = @_;
652 :     # Get a list of the feature IDs and map them to their functional assignments.
653 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
654 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
655 :     [$genomeID], 'HasFeature(to-link)');
656 :     # Return the result.
657 :     return @retVal;
658 :     }
659 :    
660 :     =head3 ShowLists
661 :    
662 : parrello 1.24 my @htmlLines = ShowLists($cgi, %lists);
663 : parrello 1.1
664 : parrello 1.16 Display a set of lists. Each list should consist of 2-tuples, and the list
665 :     entries will be displayed as 2-element table rows with a header row.
666 : parrello 1.1
667 :     =over 4
668 :    
669 : parrello 1.16 =item cgi
670 : parrello 1.2
671 : parrello 1.16 A CGI query object containing the options for this program. It is also used to format
672 : parrello 1.24 HTML.
673 : parrello 1.2
674 : parrello 1.1 =item lists
675 :    
676 :     A hash mapping list names to list references.
677 :    
678 : parrello 1.16 =item RETURN
679 :    
680 :     Returns a list of HTML lines displaying the list in tabular form.
681 :    
682 : parrello 1.15 =back
683 :    
684 : parrello 1.1 =cut
685 :    
686 :     sub ShowLists {
687 :     # Get the parameters.
688 : parrello 1.16 my $cgi = shift @_;
689 : parrello 1.1 my %lists = @_;
690 : parrello 1.16 # Declare the return variable. The HTML lines will be accumulated
691 :     # in here and then joined with new-lines.
692 :     my @retVal = ();
693 : parrello 1.1 # Loop through the lists in alphabetical order by list name.
694 : parrello 1.6 for my $listName (sort keys %lists) {
695 : parrello 1.1 # Get the list itself.
696 :     my $list = $lists{$listName};
697 :     # Get the number of list items.
698 :     my $listSize = scalar @{$list};
699 : parrello 1.6 # Only proceed if the list is nonempty.
700 :     if ($listSize > 0) {
701 : parrello 1.16 my $header = ComputeHeader($listName, $listSize);
702 : parrello 1.6 Trace($header) if T(3);
703 : parrello 1.16 # Display the header line as a header.
704 :     push @retVal, ShowTitle($cgi, $header);
705 : parrello 1.17 # Now display the list as table rows. Note we convert underbars to spaces
706 :     # in the name row to make the table easier to fit into narrow places.
707 : parrello 1.16 for my $entry (@{$list}) {
708 :     my ($name, $data) = @{$entry};
709 : parrello 1.18 $name =~ tr/_/ /;
710 : parrello 1.23 push @retVal, ShowDatum($cgi, $name => $data);
711 : parrello 1.1 }
712 :     }
713 :     }
714 : parrello 1.16 # Return the list of HTML lines.
715 :     return @retVal;
716 : parrello 1.1 }
717 :    
718 : parrello 1.16 =head3 ComputeHeader
719 : parrello 1.7
720 : parrello 1.24 my $header = ComputeHeader($name, $count);
721 : parrello 1.7
722 :     Return a list header for a list of the specified length.
723 :    
724 :     =over 4
725 :    
726 :     =item name
727 :    
728 :     Name of the list.
729 :    
730 :     =item count
731 :    
732 :     Number of entries in the list.
733 :    
734 :     =item RETURN
735 :    
736 :     Returns a list header that shows the name of the list and the number of entries.
737 :    
738 :     =back
739 :    
740 :     =cut
741 :    
742 : parrello 1.16 sub ComputeHeader {
743 : parrello 1.7 # Get the parameters.
744 :     my ($name, $count) = @_;
745 :     # Declare the return variable.
746 : parrello 1.15 my $retVal;
747 : parrello 1.7 if ($count == 0) {
748 : parrello 1.16 $retVal = "$name: none";
749 : parrello 1.7 } elsif ($count == 1) {
750 : parrello 1.16 $retVal = "$name: one";
751 : parrello 1.7 } else {
752 : parrello 1.16 $retVal = "$name: $count";
753 : parrello 1.7 }
754 :     # Return the result.
755 :     return $retVal;
756 :     }
757 :    
758 : parrello 1.11 =head3 ShowCounts
759 :    
760 : parrello 1.24 ShowCounts($sprout);
761 : parrello 1.11
762 :     Display general counts for the specified sprout instance. These counts are
763 :     used in progress reports.
764 :    
765 :     =over 4
766 :    
767 : parrello 1.16 =item cgi
768 :    
769 :     CGI query object used to format the output.
770 :    
771 : parrello 1.11 =item sprout
772 :    
773 :     Sprout instance for which counts are to be produced.
774 :    
775 : parrello 1.16 =item RETURN
776 :    
777 :     Returns a list of HTML lines with the counts arranged in table rows.
778 :    
779 : parrello 1.11 =back
780 :    
781 :     =cut
782 :    
783 :     sub ShowCounts {
784 :     # Get the parameters.
785 : parrello 1.16 my ($cgi, $sprout) = @_;
786 : parrello 1.11 # Count genomes and subsystems.
787 :     my $genomes = $sprout->GetCount(['Genome']);
788 :     my $subsystems = $sprout->GetCount(['Subsystem']);
789 : parrello 1.23 # Count roles and external functional assignments.
790 : parrello 1.11 my $roles = $sprout->GetCount(['OccursInSubsystem']);
791 :     my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
792 : parrello 1.16 # Count features.
793 : parrello 1.11 my $features = $sprout->GetCount(['Feature']);
794 :     # Display the counts.
795 : parrello 1.16 my @retVal = ();
796 : parrello 1.23 push @retVal, ShowDatum($cgi, Genomes => $genomes);
797 :     push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
798 :     push @retVal, ShowDatum($cgi, Roles => $roles);
799 :     push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
800 :     push @retVal, ShowDatum($cgi, Features => $features);
801 : parrello 1.16 # Return the html.
802 :     return @retVal;
803 :     }
804 :    
805 : parrello 1.23 =head3 ShowDatum
806 :    
807 : parrello 1.24 my $htmlText = ShowDatum($cgi, $label, $value);
808 : parrello 1.23
809 :     Return a table row displaying the specified label and value.
810 :    
811 :     =over 4
812 :    
813 :     =item cgi
814 :    
815 :     CGI query object used to generate the HTML text.
816 :    
817 :     =item label
818 :    
819 :     Label to appear in the left cell of the table row.
820 :    
821 :     =item value
822 :    
823 :     Value to appear in the right cell of the table row.
824 :    
825 :     =item RETURN
826 :    
827 :     Returns the HTML for a single table row with the last cell right-aligned.
828 :    
829 :     =back
830 :    
831 :     =cut
832 :    
833 :     sub ShowDatum {
834 :     # Get the parameters.
835 :     my ($cgi, $label, $value) = @_;
836 :     # Create the table row.
837 :     my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
838 :     # Return it.
839 :     return $retVal;
840 :     }
841 :    
842 : parrello 1.16 =head3 ShowTitle
843 :    
844 : parrello 1.24 my $html = ShowTitle($cgi, $title);
845 : parrello 1.16
846 :     Display a title line. This will be a merged table row with bolded text.
847 :    
848 :     =over 4
849 :    
850 :     =item cgi
851 :    
852 :     CGI query object used to generate HTML output.
853 :    
854 :     =item RETURN
855 :    
856 :     Returns the HTML text.
857 :    
858 :     =back
859 :    
860 :     =cut
861 :    
862 :     sub ShowTitle {
863 :     # Get the parameters.
864 :     my ($cgi, $title) = @_;
865 :     # Declare the return variable.
866 :     my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
867 :     # Return the result.
868 :     return $retVal;
869 : parrello 1.11 }
870 : parrello 1.16
871 : parrello 1.25 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3