[Bio] / Sprout / NmpdrCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NmpdrCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     use strict;
21 :     use Tracer;
22 :     use FIG;
23 :     use SFXlate;
24 :     use Sprout;
25 :     use Stats;
26 :    
27 :     =head1 NmpdrCheck Script
28 :    
29 :     =head2 Introduction
30 :    
31 :     This script performs useful NMPDR validation functions. The various command-line
32 :     options direct it to perform basic tests of the NMPDR data.
33 :    
34 :     =head2 Command-Line Options
35 :    
36 :     =over 4
37 :    
38 :     =item trace
39 :    
40 :     Specifies the tracing level. The higher the tracing level, the more messages
41 :     will appear in the trace log. Use E to specify emergency tracing.
42 :    
43 :     =item subsystems
44 :    
45 :     This option lists all the SEED subsystems, indicating which are in the Sprout
46 :     and which are marked for NMPDR but not yet in the Sprout, and what the status is
47 :     of each diagram.
48 :    
49 : parrello 1.2 =item bbhs
50 :    
51 :     This option lists all of the NMPDR genomes, along with the number of BBHs
52 :     available for each. This is useful for determining whether or not BBHs
53 :     exist for all genomes.
54 :    
55 :     =item attrCheck
56 :    
57 :     This option loops through the NMPDR genomes defined in the SEED,
58 :     looking for the presence of special attributes. This is useful for
59 :     verifying the accuracy of a load.
60 :    
61 : parrello 1.1 =item user
62 :    
63 :     Name suffix to be used for log files. If omitted, the PID is used.
64 :    
65 :     =item sql
66 :    
67 :     If specified, turns on tracing of SQL activity.
68 :    
69 :     =item background
70 :    
71 :     Save the standard and error output to files. The files will be created
72 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
73 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
74 :     B<user> option above.
75 :    
76 :     =item help
77 :    
78 :     Display this command's parameters and options.
79 :    
80 :     =item warn
81 :    
82 :     Create an event in the RSS feed when an error occurs.
83 :    
84 :     =item phone
85 :    
86 :     Phone number to message when the script is complete.
87 :    
88 :     =back
89 :    
90 :     =cut
91 :    
92 :     # This hash maps option names to subroutine names. To add an option, you must
93 :     # create a subroutine to process it, add POD documentation above, add a summary
94 :     # to the StandardSetup call below, and map its name to the subroutine name in
95 :     # this constant.
96 :     my %OptionMap = (
97 :     subsystems => 'CheckSubsystems',
98 : parrello 1.2 bbhs => 'CheckBBHs',
99 :     attrCheck => 'CheckSeedAttrs'
100 : parrello 1.1 );
101 :     # Get the command-line options and parameters.
102 :     my ($options, @parameters) = StandardSetup([qw(SproutSubsys Sprout) ],
103 :     {
104 :     trace => ["2", "tracing level"],
105 :     subsystems => ["", "if specified, will verify the subsystem list"],
106 : parrello 1.2 attrCheck => ["", "if specified, will display attribute data for NMPDR genomes in the SEED"],
107 :     bbhs => ["", "if specified, will verify the BBHs on the BBH server"],
108 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"]
109 :     },
110 :     "",
111 :     @ARGV);
112 :     # Set a variable to contain return type information.
113 :     my $rtype;
114 :     # Insure we catch errors.
115 :     eval {
116 :     # Create the FIG object.
117 :     my $fig = FIG->new();
118 :     # Create the SFXlate object.
119 :     my $sfx = SFXlate->new();
120 :     # Create the statistics object.
121 :     my $stats = Stats->new();
122 :     # Display version information.
123 :     Trace("NMPDR version $FIG_Config::version type $FIG_Config::nmpdr_site_type.") if T(2);
124 :     # Process according to the options specified.
125 :     for my $option (sort keys %OptionMap) {
126 :     # Is this option specified?
127 :     if ($options->{$option}) {
128 :     $stats->Add(checks => 1);
129 :     # Call the method.
130 :     my $newStats = eval("$OptionMap{$option}(\$fig, \$sfx)");
131 :     if ($@) {
132 :     Trace("Error in $option check: $@") if T(0);
133 :     $stats->Add(crashes => 1);
134 :     } else {
135 :     # Display the statistics.
136 :     Trace("Statistics for $option check:\n" . $newStats->Show()) if T(2);
137 :     # Roll up the statistics.
138 :     $stats->Accumulate($newStats);
139 :     }
140 :     }
141 :     }
142 :     # If there was an error, or more than one test, display the rolled statistics.
143 :     if ($stats->Ask('checks') > 1 || $stats->Ask('crashes')) {
144 :     Trace("Summary statistics for all checks:\n" . $stats->Show()) if T(2);
145 :     }
146 :     };
147 :     if ($@) {
148 :     Trace("Script failed with error: $@") if T(0);
149 :     $rtype = "error";
150 :     } else {
151 :     Trace("Script complete.") if T(2);
152 :     $rtype = "no error";
153 :     }
154 :     if ($options->{phone}) {
155 :     my $msgID = Tracer::SendSMS($options->{phone}, "NmpdrCheck terminated with $rtype.");
156 :     if ($msgID) {
157 :     Trace("Phone message sent with ID $msgID.") if T(2);
158 :     } else {
159 :     Trace("Phone message not sent.") if T(2);
160 :     }
161 :     }
162 :    
163 :     =head3 CheckSubsystems
164 :    
165 :     my $stats = CheckSubsystems($fig, $sfx);
166 :    
167 :     Loop through all of the SEED subsystems, listing their NMPDR status and
168 :     enumerating the diagrams.
169 :    
170 :     =over 4
171 :    
172 :     =item fig
173 :    
174 :     [[FigPm]] object for accessing the SEED data store.
175 :    
176 :     =item sfx
177 :    
178 :     [[SFXlatePm]] object for accessing the NMPDE database.
179 :    
180 :     =item RETURN
181 :    
182 :     Returns a statistics object with a summary of what happened.
183 :    
184 :     =back
185 :    
186 :     =cut
187 :    
188 :     sub CheckSubsystems {
189 :     # Get the parameters.
190 :     my ($fig, $sfx) = @_;
191 :     # Create the statistics object.
192 :     my $retVal = Stats->new(qw(nmpdrPure nmpdrInserting nmpdrDeleting nmpdrExcluded));
193 :     # Get all of the SEED subsystems.
194 :     my %inSeed = map { $_ => 1 } $fig->all_subsystems();
195 :     # Get all of the NMPDR subsystems.
196 :     my %inNmpdr = map { $_ => 1 } $sfx->all_subsystems();
197 :     # Loop through the SEED subsystems.
198 :     for my $subName (sort keys %inSeed) {
199 :     Trace("Processing SEED subsystem $subName.") if T(3);
200 :     my $markedNmpdr = $fig->nmpdr_subsystem($subName);
201 :     # We'll accumulate data lines for the subsystem in here.
202 :     my @lines;
203 :     # Determine the NMPDR status.
204 :     my $status;
205 :     if ($inNmpdr{$subName} && $markedNmpdr) {
206 :     push @lines, "Subsystem is marked NMPDR and is found in the NMPDR database.";
207 :     $status = 'nmpdrPure';
208 :     } elsif ($inNmpdr{$subName}) {
209 :     push @lines, "Subsystem is not marked NMPDR and is found in the NMPDR database.";
210 :     $status = 'nmpdrDeleting';
211 :     } elsif ($markedNmpdr) {
212 :     push @lines, "Subsystem is marked NMPDR and is not found in the NMPDR database.";
213 :     $status = 'nmpdrInserting';
214 :     } else {
215 :     push @lines, "Subsystem is not marked NMPDR and is not found in the NMPDR database.";
216 :     $status = 'nmpdrExcluded';
217 :     }
218 :     $retVal->Add($status => 1);
219 :     # Get the SEED subsystem object.
220 :     Trace("Retrieving subsystem object for $subName.") if T(3);
221 :     my $subData = $fig->get_subsystem($subName);
222 :     if (! $subData) {
223 :     push @lines, "ERROR: Subsystem object cannot be created.";
224 :     $retVal->Add(seedSubError => 1);
225 :     } else {
226 :     $retVal->Add(seedSubFound => 1);
227 :     # Check the diagrams.
228 :     my @diagrams = $subData->get_diagrams();
229 :     for my $diagramData (@diagrams) {
230 :     $retVal->Add(subDiagram => 1);
231 :     my ($diagramID) = @{$diagramData};
232 :     my $diagramFlag = ($subData->is_new_diagram($diagramID) ? "new" : "old");
233 :     $retVal->Add("subDiagram" . ucfirst($diagramFlag) => 1);
234 :     push @lines, "Diagram $diagramID is $diagramFlag format.";
235 :     }
236 :     }
237 :     my $thing = join("\n ", "Results for $subName", @lines);
238 :     Trace($thing) if T(2);
239 :     }
240 :     # Now check for deleted subsystems.
241 :     for my $subName (sort keys %inNmpdr) {
242 :     if (! $inSeed{$subName}) {
243 :     Trace("Subsystem $subName is in NMPDR but deleted from SEED.") if T(2);
244 :     $retVal->Add(nmpdrDeleted => 1);
245 :     }
246 :     }
247 :     # Return the statistics object.
248 :     return $retVal;
249 :     }
250 :    
251 : parrello 1.2 =head3 CheckBBHs
252 :    
253 :     my $stats = CheckBBHs($fig, $sfx);
254 :    
255 :     Loop through all of the Sprout genomes, listing their BBH count.
256 :    
257 :     =over 4
258 :    
259 :     =item fig
260 :    
261 :     [[FigPm]] object for accessing the SEED data store.
262 :    
263 :     =item sfx
264 :    
265 :     [[SFXlatePm]] object for accessing the NMPDE database.
266 :    
267 :     =item RETURN
268 :    
269 :     Returns a statistics object with a summary of what happened.
270 :    
271 :     =back
272 :    
273 :     =cut
274 :    
275 :     sub CheckBBHs {
276 :     my ($fig, $sfx) = @_;
277 :     # Create the staitstics object to return to the caller.
278 :     my $retVal = Stats->new();
279 :     # Get the list of genomes.
280 :     my @genomes = $sfx->all_genomes();
281 :     # Get the genome names.
282 :     my %genomeNames = ();
283 :     for my $genome (@genomes) {
284 :     my $name = $sfx->genus_species($genome) . " [$genome]";
285 :     $genomeNames{$name} = $genome;
286 :     }
287 :     # Process the genomes in name order.
288 :     for my $name (sort keys %genomeNames) {
289 :     my $genome = $genomeNames{$name};
290 :     # Count this genome's BBHs.
291 :     my $count = FIGRules::BatchBBHs("fig|$genome.%", 1e-10);
292 :     # A count of 0 is bad.
293 :     if (! $count) {
294 :     Trace("$name has no BBHs. ***") if T(1);
295 :     $retVal->Add(badGenomes => 1);
296 :     } else {
297 :     Trace("$name BBH count is $count.") if T(3);
298 :     $retVal->Add(bbhCount => $count);
299 :     }
300 :     $retVal->Add(genomes => 1);
301 :     }
302 :     # Tell the user how bad things are.
303 :     Trace($retVal->Ask('badGenomes') . " out of " . $retVal->Ask('genomes') .
304 :     " genomes had no BBHs.") if T(2);
305 :     # Return the stats.
306 :     return $retVal;
307 :     }
308 :    
309 :     =head3 CheckSeedAttrs
310 :    
311 :     my $stats = CheckSeedAttrs($fig, $sfx);
312 :    
313 :     Loop through all of the SEED genomes marked for the NMPDR,
314 :     listing their special attributes.
315 :    
316 :     =over 4
317 :    
318 :     =item fig
319 :    
320 :     [[FigPm]] object for accessing the SEED data store.
321 :    
322 :     =item sfx
323 :    
324 :     [[SFXlatePm]] object for accessing the NMPDE database.
325 :    
326 :     =item RETURN
327 :    
328 :     Returns a statistics object with a summary of what happened.
329 :    
330 :     =back
331 :    
332 :     =cut
333 :    
334 :     sub CheckSeedAttrs {
335 :     my ($fig, $sfx) = @_;
336 :     # This table is used to determine how we want to look for attributes.
337 :     my %attrTable = (
338 :     CDD => ['CDD', undef],
339 :     PSORT => ['PSORT', undef],
340 :     Phobius => ['Phobius', undef],
341 :     IEDB => ['iedb%', undef],
342 :     essential =>[undef, 'essential'],
343 :     virulent => ['virulen%',undef],
344 :     );
345 :     # Get a statistics object to return to the caller.
346 :     my $retVal = Stats->new();
347 :     # Get all the NMPDR genomes.
348 :     my @genomes = $fig->genomes(1);
349 :     Trace(scalar(@genomes) . " genomes found.") if T(2);
350 :     for my $genome (@genomes) {
351 :     # Create a stats object for this genome.
352 :     my $stats = Stats->new(keys %attrTable);
353 :     # Look for this genome's attributes.
354 :     for my $attr (keys %attrTable) {
355 :     my @results = $fig->get_attributes("fig|$genome.%",
356 :     $attrTable{$attr}[0],
357 :     $attrTable{$attr}[1]);
358 :     # Record the attribute count.
359 :     $stats->Add($attr => scalar(@results));
360 :     # Record this test.
361 :     $retVal->Add(queries => 1);
362 :     }
363 :     # Get the genome's name.
364 :     my $name = $fig->genus_species($genome);
365 :     # Display its statistics.
366 :     Trace("Results for $name [$genome]: " . $stats->Display()) if T(2);
367 :     # Roll them into the main statistics.
368 :     $retVal->Accumulate($stats);
369 :     $retVal->Add(genomes => 1);
370 :     }
371 :     # Return the statistcs.
372 :     return $retVal;
373 :     }
374 :    
375 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3