[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.24 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 : parrello 1.16 the old and new sprouts and lists the differences and produces
7 :     a report in HTML. The output is an HTML fragment, not an entire
8 :     web page. This is because we expect it to be included in another
9 :     page.
10 : parrello 1.1
11 : parrello 1.11 The currently-supported command-line options for NewStuffCheck are as follows.
12 : parrello 1.1
13 :     =over 4
14 :    
15 :     =item user
16 :    
17 :     Name suffix to be used for log files. If omitted, the PID is used.
18 :    
19 :     =item trace
20 :    
21 :     Numeric trace level. A higher trace level causes more messages to appear. The
22 :     default trace level is 2. Tracing will be directly to the standard output
23 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
24 :     where I<User> is the value of the B<user> option above.
25 :    
26 :     =item sql
27 :    
28 :     If specified, turns on tracing of SQL activity.
29 :    
30 :     =item background
31 :    
32 :     Save the standard and error output to files. The files will be created
33 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
34 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
35 :     B<user> option above.
36 :    
37 :     =item h
38 :    
39 :     Display this command's parameters and options.
40 :    
41 :     =item phone
42 :    
43 :     Phone number to message when the script is complete.
44 :    
45 : parrello 1.11 =item groupFile
46 :    
47 :     Name of the group file (described below). The default is C<groups.tbl>
48 :     in the Sprout data directory.
49 :    
50 : parrello 1.16 =item outFile
51 :    
52 : parrello 1.19 Output file name. The default is C<html/includes/diff.inc> in the
53 : parrello 1.16 nmpdr C<next> directory.
54 :    
55 : parrello 1.23 =item orgFile
56 :    
57 :     Output file for the genome report. The default is C<html/includes/genomes.inc> in
58 :     the nmpdr C<next> directory.
59 :    
60 : parrello 1.11 =back
61 :    
62 :     =head2 The Group File
63 :    
64 :     A key data file for this process is C<groups.tbl>. This file is kept in the
65 :     Sprout Data directory, and contains the following columns:
66 :    
67 :     =over 4
68 :    
69 :     =item name
70 :    
71 :     Name of the group.
72 :    
73 :     =item page
74 :    
75 :     Name of the group's page on the web site (e.g. C<campy.php> for
76 :     Campylobacter)
77 :    
78 :     =item genus
79 :    
80 :     Genus of the group
81 :    
82 :     =item species
83 :    
84 :     Species of the group, or an empty string if the group is for an entire
85 : parrello 1.13 genus. If the group contains more than one species, the species names
86 :     should be separated by commas.
87 : parrello 1.11
88 : parrello 1.1 =back
89 :    
90 :     =cut
91 :    
92 :     use strict;
93 :     use Tracer;
94 :     use DocUtils;
95 :     use TestUtils;
96 :     use Cwd;
97 :     use File::Copy;
98 :     use File::Path;
99 :     use FIG;
100 :     use SFXlate;
101 :     use Sprout;
102 : parrello 1.16 use CGI;
103 : parrello 1.20 use FIGRules;
104 : parrello 1.1
105 :     # Get the command-line options and parameters.
106 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
107 :     {
108 : parrello 1.11 groupFile => ["$FIG_Config::sproutData/groups.tbl", "location of the NMPDR group description file"],
109 : parrello 1.16 trace => ["2", "tracing level; use a minus to prevent tracing to standard output"],
110 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
111 : parrello 1.16 outFile => ["$FIG_Config::nmpdr_base/next/html/includes/diff.inc", "output file for the difference report"],
112 : parrello 1.23 orgFile => ["$FIG_Config::nmpdr_base/next/html/includes/genomes.inc", "output file for the genome report"],
113 : parrello 1.1 },
114 :     "",
115 :     @ARGV);
116 :     # Set a variable to contain return type information.
117 :     my $rtype;
118 :     # Insure we catch errors.
119 :     eval {
120 : parrello 1.16 # Get a CGI object for building the output. We pass it the options hash so
121 :     # the formatting subroutines have access to it. Also, we want it to know
122 :     # we're not a real web script.
123 :     my $cgi = CGI->new($options);
124 :     # Start accumulating HTML data.
125 :     my @html = ();
126 :     # Open the output file. We do this early in case there's a problem.
127 :     my $outFileName = $options->{outFile};
128 :     Trace("Opening output file $outFileName.") if T(2);
129 :     Open(\*OUTPUT, ">$outFileName");
130 :     # Get a nice-looking version name and make it into a title.
131 :     my $version = uc $FIG_Config::nmpdr_version;
132 :     $version =~ tr/_/ /;
133 : parrello 1.23 push @html, $cgi->h4({align => "center"}, "Difference Report for $version");
134 : parrello 1.16 # Start the table.
135 :     push @html, $cgi->start_table({align => "center", border => "2"});
136 : parrello 1.11 # Get the group file.
137 :     my $groupFileName = $options->{groupFile};
138 :     Trace("Reading group file $groupFileName.") if T(2);
139 : parrello 1.2 Trace("Processing genomes.") if T(2);
140 : parrello 1.8 # Get the current SEED.
141 :     my $fig = FIG->new();
142 : parrello 1.1 # Get the old Sprout.
143 : parrello 1.22 my $oldSprout = SFXlate->old_sprout_only();
144 : parrello 1.1 # Get its genomes in alphabetical order.
145 : parrello 1.8 my @oldGenomes = GetGenomes($oldSprout);
146 : parrello 1.1 # Get the new Sprout.
147 :     my $newSprout = SFXlate->new_sprout_only();
148 :     # Get its genomes in alphabetical order.
149 : parrello 1.8 my @newGenomes = GetGenomes($newSprout);
150 : parrello 1.7 # Compare the two genomes lists.
151 : parrello 1.1 my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
152 : parrello 1.24 # Get the super-group data.
153 :     my %superTable = $newSprout->CheckGroupFile();
154 :     # Create a list for the new genomes that includes BBH and feature counts. We'll flip this
155 :     # lists so that the genome names are first and the IDs second.
156 :     my @insertedGenomeList = ();
157 : parrello 1.7 for my $insertedGenome (@{$insertedGenomes}) {
158 :     my $genomeID = $insertedGenome->[0];
159 : parrello 1.20 # For a new genome, display the feature and BBH counts.
160 : parrello 1.7 my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
161 :     [$genomeID]);
162 : parrello 1.24 my $suffix = ($count == 1 ? "one feature" : "$count features");
163 : parrello 1.20 my $bbhCount = FIGRules::BatchBBHs("fig|$genomeID.%", 1e-10);
164 :     $suffix .= "; " . ($bbhCount == 1 ? "one BBH" : "$bbhCount BBHs");
165 : parrello 1.24 push @insertedGenomeList, [$insertedGenome->[1], "$genomeID ($suffix)"];
166 : parrello 1.7 }
167 : parrello 1.24 # Create a list for the deleted genomes that contains information about SEED status.
168 :     # This list is flipped, too.
169 :     my @deletedGenomeList = ();
170 : parrello 1.8 for my $deletedGenome (@{$deletedGenomes}) {
171 :     my $genomeID = $deletedGenome->[0];
172 : parrello 1.24 my $suffix = "";
173 : parrello 1.8 if ($fig->is_genome($genomeID)) {
174 : parrello 1.24 # Here the deleted genome is still in the SEED.
175 : parrello 1.8 my $complete = ($fig->is_complete($genomeID) ? "complete" : "incomplete");
176 : parrello 1.24 $suffix = " (still in SEED, $complete)";
177 :     } else {
178 :     # It's not in the SEED. See if it has been replaced.
179 :     my ($genus, $species, $strain) = $oldSprout->GetGenomeNameData($genomeID);
180 :     my @genomeIDs = $newSprout->GetGenomeByNameData($genus, $species, $strain);
181 :     if (scalar @genomeIDs) {
182 :     $suffix = " (replaced)";
183 :     }
184 : parrello 1.8 }
185 : parrello 1.24 push @deletedGenomeList, [$deletedGenome->[1], "$genomeID$suffix"];
186 : parrello 1.8 }
187 : parrello 1.1 # Display the lists.
188 : parrello 1.24 push @html, ShowLists($cgi, 'New Genomes' => \@insertedGenomeList,
189 :     'Deleted Genomes' => \@deletedGenomeList);
190 : parrello 1.8 # Now the groups.
191 :     Trace("Comparing groups.") if T(2);
192 :     my %oldGroups = $oldSprout->GetGroups();
193 :     my %newGroups = $newSprout->GetGroups();
194 :     # Loop through the new groups.
195 :     for my $newGroup (sort keys %newGroups) {
196 :     Trace("Processing group $newGroup.") if T(3);
197 :     # Find out if this group is new to this version.
198 :     if (! exists $oldGroups{$newGroup}) {
199 :     # Construct a list of this group's genes.
200 :     my @groupGenomes = NameGenomes($newSprout, $newGroups{$newGroup});
201 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in new group $newGroup" => \@groupGenomes);
202 : parrello 1.8 } else {
203 : parrello 1.21 # Here the group is in both versions. Fix the lists and compare them. Note that we'll be comparing
204 : parrello 1.24 # on the genome ID, which will become the second list element after the call to NameGenomes.
205 : parrello 1.21 my @newGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($newSprout, $newGroups{$newGroup});
206 :     my @oldGroupList = sort { $a->[1] <=> $b->[1] } NameGenomes($oldSprout, $oldGroups{$newGroup});
207 : parrello 1.10 Trace("Comparing lists for $newGroup.") if T(4);
208 : parrello 1.21 my ($insertedGroupGenomes, $deletedGroupGenomes) = Tracer::CompareLists(\@newGroupList, \@oldGroupList, 1);
209 : parrello 1.10 Trace("Comparison complete.") if T(4);
210 : parrello 1.16 # Delete the old group data. When we're done, this means the hash
211 :     # will contain only the deleted groups.
212 : parrello 1.8 delete $oldGroups{$newGroup};
213 :     # Show the lists. Empty lists will not be shown.
214 : parrello 1.10 Trace("Displaying group lists.") if T(4);
215 : parrello 1.16 push @html, ShowLists($cgi, "Genomes new to $newGroup" => $insertedGroupGenomes,
216 :     "Genomes no longer in $newGroup" => $deletedGroupGenomes);
217 : parrello 1.8 }
218 :     }
219 : parrello 1.10 Trace("Processing deleted groups.") if T(4);
220 : parrello 1.8 # Now list the deleted groups.
221 :     for my $oldGroup (sort keys %oldGroups) {
222 :     Trace("Processing deleted group $oldGroup.") if T(3);
223 :     my @groupGenomes = NameGenomes($oldSprout, $oldGroups{$oldGroup});
224 : parrello 1.16 push @html, ShowLists($cgi, "Genomes in deleted group $oldGroup" => \@groupGenomes);
225 : parrello 1.8 }
226 : parrello 1.1 # Next, we get the subsystems.
227 : parrello 1.2 Trace("Processing subsystems.") if T(2);
228 : parrello 1.1 my @oldSubsystems = GetSubsystems($oldSprout);
229 :     my @newSubsystems = GetSubsystems($newSprout);
230 :     # Compare and display the subsystem lists.
231 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
232 : parrello 1.8 # Check the deleted subsystems to see if they're in SEED.
233 :     if (scalar @{$deletedSubs} > 0) {
234 :     my %subChecker = map { $_ => 1 } $fig->all_subsystems();
235 :     for my $deletedSub (@{$deletedSubs}) {
236 :     my $subID = $deletedSub->[0];
237 :     if ($subChecker{$subID}) {
238 :     my $trusted = ($fig->usable_subsystem($subID) ? "usable" : "not usable");
239 : parrello 1.9 $deletedSub->[1] .= " (still in SEED, $trusted)";
240 : parrello 1.8 }
241 :     }
242 :     }
243 : parrello 1.16 push @html, ShowLists($cgi, 'New Subsystems' => $insertedSubs,
244 :     'Deleted Subsystems' => $deletedSubs);
245 : parrello 1.23 # Print what we've done so far.
246 :     FlushData(\*OUTPUT, \@html);
247 :     # Now we need to process some statistics that require looping through all the
248 :     # features in the new sprout. While we're at it, we'll collect the BBH and
249 :     # coupling counts.
250 :     my $bbhCount = 0;
251 :     my $couplingCount = 0;
252 : parrello 1.24 # We'll accumulate a report of genomes with missing BBHs in here.
253 :     my @bbhMissingGenomes = ();
254 : parrello 1.23 # One of the reports is only for genomes common to both sprouts. To help us
255 :     # make this determination, we get a hash of the inserted genomes.
256 :     my %skipGenomes = map { $_->[0] => 1 } @{$insertedGenomes};
257 :     # Open the organism report file.
258 :     Open(\*ORGOUT, ">$options->{orgFile}");
259 :     # Start the table.
260 :     my @orgHtml = ();
261 : parrello 1.24 push @orgHtml, $cgi->h4("Genome Report for $version");
262 : parrello 1.23 push @orgHtml, $cgi->start_table({ border => 2, align => 'center'});
263 :     push @orgHtml, $cgi->Tr($cgi->th("Genome"),
264 :     $cgi->th({align => 'right'}, ["Size (bp)", "Feats", "Contigs", "Subs",
265 :     "F In SS", "PEGs", "RNAs", "PPs", "New", "Del"]));
266 :     FlushData(\*ORGOUT, \@orgHtml);
267 : parrello 1.24 # Now we start the loop. Note that "newGenomes" means all the genomes in the new Sprout,
268 :     # not the list of genomes that are new!
269 : parrello 1.23 for my $genomeData (@newGenomes) {
270 :     # Get this genome's ID and name.
271 :     my $genomeID = $genomeData->[0];
272 :     # Create a title for it.
273 :     my $genomeTitle = "$genomeID: $genomeData->[1]";
274 :     # Compute its size.
275 :     my $genomeSize = $newSprout->GenomeLength($genomeID);
276 : parrello 1.24 # Get a list of the genomes in the new Sprout that are not this one.
277 : parrello 1.23 my @otherGenomes = grep { $_ ne $genomeID } map { $_->[0] } @newGenomes;
278 :     Trace("Computing BBH matrix for $genomeID.") if T(3);
279 : parrello 1.24 # Get the bbh matrix going from the current genome to the others.
280 : parrello 1.23 my %matrix = $newSprout->BBHMatrix($genomeID, 1e-20, @otherGenomes);
281 :     # Set up the subsystem hash. This will be used to track which subsystems are used by
282 :     # the genome's features.
283 :     my %subHash = ();
284 :     # Set up the contig hash. This will be used to track which contigs are used by the
285 :     # genome's features.
286 :     my %contigHash = ();
287 :     # Set up a counter hash for feature types.
288 :     my %counters = (peg => 0, in_ss => 0, rna => 0, pp => 0, total => 0);
289 :     # We'll store information about the genome's features (ID and functional role) in here.
290 :     my @newFeatures = ();
291 : parrello 1.24 # Finally, we'll use this flag to warn us if there are no BBHs.
292 :     my $bbhsFound = 0;
293 : parrello 1.23 # Loop through the genome's features. The order is important here, because we need
294 :     # to match the order used by "GetFeatures" for the feature difference comparison.
295 :     Trace("Processing feature statistics for $genomeID.") if T(3);
296 :     my $fquery = $newSprout->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
297 :     [$genomeID]);
298 :     # Loop through the features, updating the counts.
299 :     while (my $feature = $fquery->Fetch()) {
300 :     # Update the total feature count.
301 :     $counters{total}++;
302 :     Trace("$counters{total} features processed for $genomeID.") if T(3) && ($counters{total} % 500 == 0);
303 :     # Get the feature ID and role.
304 :     my $fid = $feature->PrimaryValue('Feature(id)');
305 :     push @newFeatures, [$fid, $feature->PrimaryValue('Feature(assignment)')];
306 :     # Check to see if we have BBH data.
307 :     if (exists $matrix{$fid}) {
308 : parrello 1.24 my $fidBbhCount = scalar keys %{$matrix{$fid}};
309 :     if ($fidBbhCount > 0) {
310 :     # Denote that this feature has BBHs.
311 :     $bbhsFound = 1;
312 :     # Add them to the total BBH count.
313 :     $bbhCount += $fidBbhCount;
314 :     }
315 : parrello 1.23 }
316 :     # Ask for couplings.
317 :     my %coupleHash = $newSprout->CoupledFeatures($fid);
318 :     $couplingCount += keys %coupleHash;
319 :     # See if this feature is in a subsystem.
320 :     my %subs = $newSprout->SubsystemsOf($fid);
321 :     if (keys %subs) {
322 :     $counters{in_ss}++;
323 :     for my $sub (keys %subs) {
324 :     $subHash{$sub} = 1;
325 :     }
326 :     }
327 :     # Increment the feature type counter.
328 :     $counters{$feature->PrimaryValue('Feature(feature-type)')}++;
329 :     # Insure we've tracked this feature's contigs.
330 :     my @locations = split /\s*,\s*/, $feature->PrimaryValue('Feature(location-string)');
331 :     for my $loc (@locations) {
332 :     my $locObject = BasicLocation->new($loc);
333 :     $contigHash{$locObject->Contig} = 1;
334 :     }
335 :     }
336 :     Trace("Feature data compiled for $genomeID.") if T(3);
337 :     # The last thing we need to do is compute the number of features added or deleted.
338 :     # This goes in the genome report, but it's only meaningful for common genomes.
339 :     my ($addCount, $delCount) = ("","");
340 :     if (! $skipGenomes{$genomeID}) {
341 :     # Get the old features.
342 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
343 :     Trace("Comparing features for $genomeID.") if T(3);
344 :     # Compare the lists.
345 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
346 :     $addCount = scalar(@{$insertedFeatures});
347 :     $delCount = scalar(@{$deletedFeatures});
348 :     }
349 : parrello 1.24 # Check to see if this genome is missing its BBHs.
350 :     if (! $bbhsFound) {
351 :     # It is, so add a line for it to the missing-BBH list.
352 :     push @bbhMissingGenomes, ShowDatum($cgi, $genomeData->[1], $genomeID);
353 :     }
354 : parrello 1.23 push @orgHtml, $cgi->Tr($cgi->td($genomeTitle),
355 :     $cgi->td({align => 'right'}, [$genomeSize, $counters{total}, scalar(keys %contigHash),
356 :     scalar(keys %subHash), $counters{in_ss}, $counters{peg},
357 :     $counters{rna}, $counters{pp}, $addCount, $delCount]));
358 :     FlushData(\*ORGOUT, \@orgHtml);
359 :     }
360 :     # Close the table for the genome report.
361 :     push @orgHtml, $cgi->end_table();
362 :     FlushData(\*ORGOUT, \@orgHtml);
363 :     close ORGOUT;
364 : parrello 1.24 # Check for a missing-BBH report.
365 :     if (scalar @bbhMissingGenomes) {
366 :     # There is a report, so put it into the output stream.
367 :     push @html, ShowTitle($cgi, "Genomes without BBHs");
368 :     push @html, @bbhMissingGenomes;
369 :     }
370 :     # Flush the genome feature comparison data and the missing-BBH report (if any).
371 : parrello 1.23 FlushData(\*OUTPUT, \@html);
372 : parrello 1.11 # Next, we show some basic counts.
373 : parrello 1.23 Trace("Displaying counts.") if T(3);
374 : parrello 1.16 push @html, ShowTitle($cgi, "Statistics for old Sprout");
375 :     push @html, ShowCounts($cgi, $oldSprout);
376 :     push @html, ShowTitle($cgi, "Statistics for new Sprout");
377 :     push @html, ShowCounts($cgi, $newSprout);
378 : parrello 1.23 push @html, ShowDatum($cgi, BBHs => $bbhCount);
379 :     push @html, ShowDatum($cgi, "Functional Couplings", $couplingCount);
380 :     FlushData(\*OUTPUT, \@html);
381 : parrello 1.11 # Now we show the genomes that are not in groups but could be. First, we convert
382 :     # our group hash from the new Sprout into the form used on the web site.
383 :     Trace("Examining possible missing genomes in groups.") if T(2);
384 : parrello 1.24 my %fixedGroups = $newSprout->Fix(%newGroups);
385 :     for my $group (sort keys %superTable) {
386 : parrello 1.11 Trace("Checking group $group.");
387 :     # Get this group's genus and species.
388 : parrello 1.24 my $genus = $superTable{$group}->{genus};
389 :     my $species = $superTable{$group}->{species};
390 : parrello 1.11 # Get a hash of the genomes already in it.
391 :     my %inGroup = map { $_ => 1 } @{$fixedGroups{$group}};
392 :     # Get a list of its possible genomes.
393 :     my $filter = 'Genome(genus) = ?';
394 :     my @parms = ($genus);
395 : parrello 1.13 # The species list is tricky because a given group may involve more than
396 : parrello 1.24 # one target species or no target species (which means we want everything).
397 :     # The species names will be in list references, and we use some PERL trickiness
398 :     # to generate an OR filter for them.
399 :     if (scalar @{$species}) {
400 : parrello 1.13 # Create one species filter per species.
401 : parrello 1.24 my @filterClauses = map { 'Genome(species) = ?' } @{$species};
402 : parrello 1.13 # OR the filter clauses together to get a real filter.
403 :     $filter .= " AND (" . (join " OR ", @filterClauses) . ")";
404 :     # Add the specieis names to the SQL parameter list.
405 : parrello 1.24 push @parms, @{$species};
406 : parrello 1.11 }
407 :     my @possibles = $newSprout->GetFlat(['Genome'], $filter, \@parms, 'Genome(id)');
408 :     # Get the possibles that aren't in the group and add identifying information.
409 : parrello 1.12 my @leftOut = NameGenomes($newSprout, [ grep { ! exists $inGroup{$_} } @possibles ]);
410 : parrello 1.11 # If anything survived, show the list.
411 :     if (@leftOut) {
412 : parrello 1.16 push @html, ShowLists($cgi, "Candidates for $group" => \@leftOut);
413 : parrello 1.11 }
414 :     }
415 : parrello 1.23 FlushData(\*OUTPUT, \@html);
416 : parrello 1.16 # Close the table.
417 :     push @html, $cgi->end_table();
418 : parrello 1.23 # Flush the last of the HTML.
419 :     FlushData(\*OUTPUT, \@html);
420 : parrello 1.16 # Close the output file.
421 :     close OUTPUT;
422 :     Trace("Analysis complete.") if T(2);
423 : parrello 1.1 };
424 :     if ($@) {
425 :     Trace("Script failed with error: $@") if T(0);
426 :     $rtype = "error";
427 :     } else {
428 :     Trace("Script complete.") if T(2);
429 :     $rtype = "no error";
430 :     }
431 :     if ($options->{phone}) {
432 : parrello 1.11 my $msgID = Tracer::SendSMS($options->{phone}, "New Stuff Checker terminated with $rtype.");
433 : parrello 1.1 if ($msgID) {
434 :     Trace("Phone message sent with ID $msgID.") if T(2);
435 :     } else {
436 :     Trace("Phone message not sent.") if T(2);
437 :     }
438 :     }
439 :    
440 : parrello 1.23 =head3 FlushData
441 :    
442 : parrello 1.24 FlushData($handle, \@lines);
443 : parrello 1.23
444 :     Write the specified lines to the output file and clear them out of the list. This
445 :     method is called periodically so that even if something goes wrong we can still
446 : parrello 1.24 see the data accumulating in the output file. The key aspect here is that we
447 :     put new-line characters after each line written and show something in the trace
448 :     log.
449 : parrello 1.23
450 :     =over 4
451 :    
452 :     =item handle
453 :    
454 :     Output handle to which the lines should be written.
455 :    
456 :     =item lines
457 :    
458 :     Reference to a list of output lines. The output lines will be written to the output
459 :     handle and then removed from the list.
460 :    
461 :     =back
462 :    
463 :     =cut
464 :    
465 :     sub FlushData {
466 :     # Get the parameters.
467 :     my ($handle, $lines) = @_;
468 :     Trace("Flushing " . scalar(@{$lines}) . " lines to output file.") if T(3);
469 :     # Write the lines.
470 :     print $handle join("\n", @{$lines});
471 :     # Write a terminating new-line.
472 :     print $handle "\n";
473 :     # Clear the list.
474 :     splice @{$lines};
475 :     }
476 :    
477 : parrello 1.8 =head3 GetGenomes
478 : parrello 1.1
479 : parrello 1.24 my @geneList = GetGenomes($sprout);
480 : parrello 1.1
481 :     Return a list of the genomes in the specified Sprout instance. The genomes
482 :     are returned in alphabetical order by genome ID.
483 :    
484 :     =over 4
485 :    
486 :     =item sprout
487 :    
488 :     Sprout instance whose gene list is desired.
489 :    
490 :     =item RETURN
491 :    
492 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
493 :     and the second is the genome name (genus, species, strain).
494 :    
495 :     =back
496 :    
497 :     =cut
498 :    
499 : parrello 1.8 sub GetGenomes {
500 : parrello 1.1 # Get the parameters.
501 :     my ($sprout) = @_;
502 :     # Get the desired data.
503 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
504 :     'Genome(genus)',
505 :     'Genome(species)',
506 :     'Genome(unique-characterization)']);
507 :     # Create the genome names from the three pieces of the name.
508 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
509 :     # Return the result.
510 :     return @retVal;
511 :     }
512 :    
513 : parrello 1.8 =head3 NameGenomes
514 :    
515 : parrello 1.24 my @newList = NameGenomes($sprout, \@genomes);
516 : parrello 1.8
517 :     Convert a list of genome IDs to a list of genome IDs with names.
518 :    
519 :     =over 4
520 :    
521 :     =item sprout
522 :    
523 :     The relevant sprout instance.
524 :    
525 :     =item genomes
526 :    
527 :     Reference to a list of genome IDs
528 :    
529 :     =item RETURN
530 :    
531 :     Returns a list of 2-tuples, each tuple consisting of a genome ID followed by a
532 :     genome name.
533 :    
534 :     =back
535 :    
536 :     =cut
537 :    
538 :     sub NameGenomes {
539 :     # Get the parameters.
540 :     my ($sprout, $genomes) = @_;
541 :     # Attach the names.
542 : parrello 1.16 my @retVal = map { [$sprout->GenusSpecies($_), $_ ] } @{$genomes};
543 : parrello 1.8 # Return the result.
544 :     return @retVal;
545 :     }
546 :    
547 : parrello 1.1 =head3 GetSubsystems
548 :    
549 : parrello 1.24 my @subsystems = GetSubsystems($sprout);
550 : parrello 1.1
551 :     Get a list of the subsystems in the specified Sprout instance.
552 :    
553 :     =over 4
554 :    
555 :     =item sprout
556 :    
557 :     Sprout instance whose subsystems are desired.
558 :    
559 :     =item RETURN
560 :    
561 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
562 :     the name of the curator.
563 :    
564 :     =back
565 :    
566 :     =cut
567 :    
568 :     sub GetSubsystems {
569 :     # Get the parameters.
570 :     my ($sprout) = @_;
571 :     # Declare the return variable.
572 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
573 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
574 :     # Return the result.
575 :     return @retVal;
576 :     }
577 :    
578 : parrello 1.11 =head3 GetProperties
579 :    
580 : parrello 1.24 my @propertyList = GetProperties($sprout);
581 : parrello 1.11
582 :     Return a list of properties. Each element in the list will be a 2-tuple containing
583 :     the property name and value in the first column and its ID in the second column.
584 :    
585 :     =over 4
586 :    
587 :     =item sprout
588 :    
589 :     Sprout instance to be used to retrieve the properties.
590 :    
591 :     =item RETURN
592 :    
593 :     Returns a list of 2-tuples. The first element in each 2-tuple will be a string
594 :     in the form of an assignment of the property value to the property name. The second
595 :     element will be the number of features possessing the property. The list will be
596 :     sorted in ascending alphabetical order.
597 :    
598 :     =back
599 :    
600 :     =cut
601 :    
602 :     sub GetProperties {
603 :     # Get the parameters.
604 :     my ($sprout) = @_;
605 :     # Get the properties.
606 :     my @props = $sprout->GetAll(['Property'],
607 :     "ORDER BY Property(property-name), Property(property-value)", [],
608 :     ['Property(property-name)', 'Property(property-value)', 'Property(id)']);
609 :     # Combine the property names and values and replace each property ID by a feature count.
610 :     my @retVal;
611 :     for my $propItem (@props) {
612 : parrello 1.19 # Split up the value on punctuation boundaries for readability.
613 :     my $propValue = $propItem->[1];
614 :     $propValue =~ s/::/ :: /g;
615 :     $propValue =~ s/([,;])(\S)/$1 $2/g;
616 :     my $label = $propItem->[0] . " = " . $propValue;
617 : parrello 1.11 my $count = $sprout->GetCount(['Feature', 'HasProperty'], "HasProperty(to-link) = ?",
618 :     [$propItem->[2]]);
619 :     push @retVal, [$label, $count];
620 :     }
621 :     # Return the result.
622 :     return @retVal;
623 :     }
624 :    
625 : parrello 1.1 =head3 GetFeatures
626 :    
627 : parrello 1.24 my @features = GetFeatures($sprout, $genomeID);
628 : parrello 1.1
629 :     Return the features of the specified genome in the specified Sprout instance.
630 :    
631 :     =over 4
632 :    
633 :     =item sprout
634 :    
635 :     Sprout instance to use to get the features.
636 :    
637 :     =item genomeID
638 :    
639 :     ID of the genome in question.
640 :    
641 :     =item RETURN
642 :    
643 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
644 :     functional assignment (if any).
645 :    
646 :     =back
647 :    
648 :     =cut
649 :    
650 :     sub GetFeatures {
651 :     # Get the parameters.
652 :     my ($sprout, $genomeID) = @_;
653 :     # Get a list of the feature IDs and map them to their functional assignments.
654 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
655 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
656 :     [$genomeID], 'HasFeature(to-link)');
657 :     # Return the result.
658 :     return @retVal;
659 :     }
660 :    
661 :     =head3 ShowLists
662 :    
663 : parrello 1.24 my @htmlLines = ShowLists($cgi, %lists);
664 : parrello 1.1
665 : parrello 1.16 Display a set of lists. Each list should consist of 2-tuples, and the list
666 :     entries will be displayed as 2-element table rows with a header row.
667 : parrello 1.1
668 :     =over 4
669 :    
670 : parrello 1.16 =item cgi
671 : parrello 1.2
672 : parrello 1.16 A CGI query object containing the options for this program. It is also used to format
673 : parrello 1.24 HTML.
674 : parrello 1.2
675 : parrello 1.1 =item lists
676 :    
677 :     A hash mapping list names to list references.
678 :    
679 : parrello 1.16 =item RETURN
680 :    
681 :     Returns a list of HTML lines displaying the list in tabular form.
682 :    
683 : parrello 1.15 =back
684 :    
685 : parrello 1.1 =cut
686 :    
687 :     sub ShowLists {
688 :     # Get the parameters.
689 : parrello 1.16 my $cgi = shift @_;
690 : parrello 1.1 my %lists = @_;
691 : parrello 1.16 # Declare the return variable. The HTML lines will be accumulated
692 :     # in here and then joined with new-lines.
693 :     my @retVal = ();
694 : parrello 1.1 # Loop through the lists in alphabetical order by list name.
695 : parrello 1.6 for my $listName (sort keys %lists) {
696 : parrello 1.1 # Get the list itself.
697 :     my $list = $lists{$listName};
698 :     # Get the number of list items.
699 :     my $listSize = scalar @{$list};
700 : parrello 1.6 # Only proceed if the list is nonempty.
701 :     if ($listSize > 0) {
702 : parrello 1.16 my $header = ComputeHeader($listName, $listSize);
703 : parrello 1.6 Trace($header) if T(3);
704 : parrello 1.16 # Display the header line as a header.
705 :     push @retVal, ShowTitle($cgi, $header);
706 : parrello 1.17 # Now display the list as table rows. Note we convert underbars to spaces
707 :     # in the name row to make the table easier to fit into narrow places.
708 : parrello 1.16 for my $entry (@{$list}) {
709 :     my ($name, $data) = @{$entry};
710 : parrello 1.18 $name =~ tr/_/ /;
711 : parrello 1.23 push @retVal, ShowDatum($cgi, $name => $data);
712 : parrello 1.1 }
713 :     }
714 :     }
715 : parrello 1.16 # Return the list of HTML lines.
716 :     return @retVal;
717 : parrello 1.1 }
718 :    
719 : parrello 1.16 =head3 ComputeHeader
720 : parrello 1.7
721 : parrello 1.24 my $header = ComputeHeader($name, $count);
722 : parrello 1.7
723 :     Return a list header for a list of the specified length.
724 :    
725 :     =over 4
726 :    
727 :     =item name
728 :    
729 :     Name of the list.
730 :    
731 :     =item count
732 :    
733 :     Number of entries in the list.
734 :    
735 :     =item RETURN
736 :    
737 :     Returns a list header that shows the name of the list and the number of entries.
738 :    
739 :     =back
740 :    
741 :     =cut
742 :    
743 : parrello 1.16 sub ComputeHeader {
744 : parrello 1.7 # Get the parameters.
745 :     my ($name, $count) = @_;
746 :     # Declare the return variable.
747 : parrello 1.15 my $retVal;
748 : parrello 1.7 if ($count == 0) {
749 : parrello 1.16 $retVal = "$name: none";
750 : parrello 1.7 } elsif ($count == 1) {
751 : parrello 1.16 $retVal = "$name: one";
752 : parrello 1.7 } else {
753 : parrello 1.16 $retVal = "$name: $count";
754 : parrello 1.7 }
755 :     # Return the result.
756 :     return $retVal;
757 :     }
758 :    
759 : parrello 1.11 =head3 ShowCounts
760 :    
761 : parrello 1.24 ShowCounts($sprout);
762 : parrello 1.11
763 :     Display general counts for the specified sprout instance. These counts are
764 :     used in progress reports.
765 :    
766 :     =over 4
767 :    
768 : parrello 1.16 =item cgi
769 :    
770 :     CGI query object used to format the output.
771 :    
772 : parrello 1.11 =item sprout
773 :    
774 :     Sprout instance for which counts are to be produced.
775 :    
776 : parrello 1.16 =item RETURN
777 :    
778 :     Returns a list of HTML lines with the counts arranged in table rows.
779 :    
780 : parrello 1.11 =back
781 :    
782 :     =cut
783 :    
784 :     sub ShowCounts {
785 :     # Get the parameters.
786 : parrello 1.16 my ($cgi, $sprout) = @_;
787 : parrello 1.11 # Count genomes and subsystems.
788 :     my $genomes = $sprout->GetCount(['Genome']);
789 :     my $subsystems = $sprout->GetCount(['Subsystem']);
790 : parrello 1.23 # Count roles and external functional assignments.
791 : parrello 1.11 my $roles = $sprout->GetCount(['OccursInSubsystem']);
792 :     my $funcs = $sprout->GetCount(['ExternalAliasFunc']);
793 : parrello 1.16 # Count features.
794 : parrello 1.11 my $features = $sprout->GetCount(['Feature']);
795 :     # Display the counts.
796 : parrello 1.16 my @retVal = ();
797 : parrello 1.23 push @retVal, ShowDatum($cgi, Genomes => $genomes);
798 :     push @retVal, ShowDatum($cgi, Subsystems => $subsystems);
799 :     push @retVal, ShowDatum($cgi, Roles => $roles);
800 :     push @retVal, ShowDatum($cgi, 'External function assignments', $funcs);
801 :     push @retVal, ShowDatum($cgi, Features => $features);
802 : parrello 1.16 # Return the html.
803 :     return @retVal;
804 :     }
805 :    
806 : parrello 1.23 =head3 ShowDatum
807 :    
808 : parrello 1.24 my $htmlText = ShowDatum($cgi, $label, $value);
809 : parrello 1.23
810 :     Return a table row displaying the specified label and value.
811 :    
812 :     =over 4
813 :    
814 :     =item cgi
815 :    
816 :     CGI query object used to generate the HTML text.
817 :    
818 :     =item label
819 :    
820 :     Label to appear in the left cell of the table row.
821 :    
822 :     =item value
823 :    
824 :     Value to appear in the right cell of the table row.
825 :    
826 :     =item RETURN
827 :    
828 :     Returns the HTML for a single table row with the last cell right-aligned.
829 :    
830 :     =back
831 :    
832 :     =cut
833 :    
834 :     sub ShowDatum {
835 :     # Get the parameters.
836 :     my ($cgi, $label, $value) = @_;
837 :     # Create the table row.
838 :     my $retVal = $cgi->Tr($cgi->td($label), $cgi->td({align => 'right'}, $value));
839 :     # Return it.
840 :     return $retVal;
841 :     }
842 :    
843 : parrello 1.16 =head3 ShowTitle
844 :    
845 : parrello 1.24 my $html = ShowTitle($cgi, $title);
846 : parrello 1.16
847 :     Display a title line. This will be a merged table row with bolded text.
848 :    
849 :     =over 4
850 :    
851 :     =item cgi
852 :    
853 :     CGI query object used to generate HTML output.
854 :    
855 :     =item RETURN
856 :    
857 :     Returns the HTML text.
858 :    
859 :     =back
860 :    
861 :     =cut
862 :    
863 :     sub ShowTitle {
864 :     # Get the parameters.
865 :     my ($cgi, $title) = @_;
866 :     # Declare the return variable.
867 :     my $retVal = $cgi->Tr($cgi->th({colspan => 2, align => "center"}, $title));
868 :     # Return the result.
869 :     return $retVal;
870 : parrello 1.11 }
871 : parrello 1.16
872 : parrello 1.24 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3