[Bio] / Sprout / NmpdrCheck.pl Repository:
ViewVC logotype

Diff of /Sprout/NmpdrCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Oct 15 11:43:40 2008 UTC revision 1.2, Mon Jan 19 21:57:23 2009 UTC
# Line 40  Line 40 
40  Specifies the tracing level. The higher the tracing level, the more messages  Specifies the tracing level. The higher the tracing level, the more messages
41  will appear in the trace log. Use E to specify emergency tracing.  will appear in the trace log. Use E to specify emergency tracing.
42    
 =item diagrams  
   
 This option lists all the subsystem diagrams, along with an indication of which  
 are new and which are old.  
   
43  =item subsystems  =item subsystems
44    
45  This option lists all the SEED subsystems, indicating which are in the Sprout  This option lists all the SEED subsystems, indicating which are in the Sprout
46  and which are marked for NMPDR but not yet in the Sprout, and what the status is  and which are marked for NMPDR but not yet in the Sprout, and what the status is
47  of each diagram.  of each diagram.
48    
49    =item bbhs
50    
51    This option lists all of the NMPDR genomes, along with the number of BBHs
52    available for each. This is useful for determining whether or not BBHs
53    exist for all genomes.
54    
55    =item attrCheck
56    
57    This option loops through the NMPDR genomes defined in the SEED,
58    looking for the presence of special attributes. This is useful for
59    verifying the accuracy of a load.
60    
61  =item user  =item user
62    
63  Name suffix to be used for log files. If omitted, the PID is used.  Name suffix to be used for log files. If omitted, the PID is used.
# Line 88  Line 95 
95  # this constant.  # this constant.
96  my %OptionMap = (  my %OptionMap = (
97                   subsystems => 'CheckSubsystems',                   subsystems => 'CheckSubsystems',
98                     bbhs => 'CheckBBHs',
99                     attrCheck => 'CheckSeedAttrs'
100                  );                  );
101  # Get the command-line options and parameters.  # Get the command-line options and parameters.
102  my ($options, @parameters) = StandardSetup([qw(SproutSubsys Sprout) ],  my ($options, @parameters) = StandardSetup([qw(SproutSubsys Sprout) ],
103                                             {                                             {
104                                                trace => ["2", "tracing level"],                                                trace => ["2", "tracing level"],
105                                                subsystems => ["", "if specified, will verify the subsystem list"],                                                subsystems => ["", "if specified, will verify the subsystem list"],
106                                                  attrCheck => ["", "if specified, will display attribute data for NMPDR genomes in the SEED"],
107                                                  bbhs => ["", "if specified, will verify the BBHs on the BBH server"],
108                                                phone => ["", "phone number (international format) to call when load finishes"]                                                phone => ["", "phone number (international format) to call when load finishes"]
109                                             },                                             },
110                                             "",                                             "",
# Line 237  Line 248 
248      return $retVal;      return $retVal;
249  }  }
250    
251    =head3 CheckBBHs
252    
253        my $stats = CheckBBHs($fig, $sfx);
254    
255    Loop through all of the Sprout genomes, listing their BBH count.
256    
257    =over 4
258    
259    =item fig
260    
261    [[FigPm]] object for accessing the SEED data store.
262    
263    =item sfx
264    
265    [[SFXlatePm]] object for accessing the NMPDE database.
266    
267    =item RETURN
268    
269    Returns a statistics object with a summary of what happened.
270    
271    =back
272    
273    =cut
274    
275    sub CheckBBHs {
276        my ($fig, $sfx) = @_;
277        # Create the staitstics object to return to the caller.
278        my $retVal = Stats->new();
279        # Get the list of genomes.
280        my @genomes = $sfx->all_genomes();
281        # Get the genome names.
282        my %genomeNames = ();
283        for my $genome (@genomes) {
284            my $name = $sfx->genus_species($genome) . " [$genome]";
285            $genomeNames{$name} = $genome;
286        }
287        # Process the genomes in name order.
288        for my $name (sort keys %genomeNames) {
289            my $genome = $genomeNames{$name};
290            # Count this genome's BBHs.
291            my $count = FIGRules::BatchBBHs("fig|$genome.%", 1e-10);
292            # A count of 0 is bad.
293            if (! $count) {
294                Trace("$name has no BBHs. ***") if T(1);
295                $retVal->Add(badGenomes => 1);
296            } else {
297                Trace("$name BBH count is $count.") if T(3);
298                $retVal->Add(bbhCount => $count);
299            }
300            $retVal->Add(genomes => 1);
301        }
302        # Tell the user how bad things are.
303        Trace($retVal->Ask('badGenomes') . " out of " . $retVal->Ask('genomes') .
304              " genomes had no BBHs.") if T(2);
305        # Return the stats.
306        return $retVal;
307    }
308    
309    =head3 CheckSeedAttrs
310    
311        my $stats = CheckSeedAttrs($fig, $sfx);
312    
313    Loop through all of the SEED genomes marked for the NMPDR,
314    listing their special attributes.
315    
316    =over 4
317    
318    =item fig
319    
320    [[FigPm]] object for accessing the SEED data store.
321    
322    =item sfx
323    
324    [[SFXlatePm]] object for accessing the NMPDE database.
325    
326    =item RETURN
327    
328    Returns a statistics object with a summary of what happened.
329    
330    =back
331    
332    =cut
333    
334    sub CheckSeedAttrs {
335        my ($fig, $sfx) = @_;
336        # This table is used to determine how we want to look for attributes.
337        my %attrTable = (
338            CDD =>      ['CDD',     undef],
339            PSORT =>    ['PSORT',   undef],
340            Phobius =>  ['Phobius', undef],
341            IEDB =>     ['iedb%',   undef],
342            essential =>[undef,     'essential'],
343            virulent => ['virulen%',undef],
344        );
345        # Get a statistics object to return to the caller.
346        my $retVal = Stats->new();
347        # Get all the NMPDR genomes.
348        my @genomes = $fig->genomes(1);
349        Trace(scalar(@genomes) . " genomes found.") if T(2);
350        for my $genome (@genomes) {
351            # Create a stats object for this genome.
352            my $stats = Stats->new(keys %attrTable);
353            # Look for this genome's attributes.
354            for my $attr (keys %attrTable) {
355                my @results = $fig->get_attributes("fig|$genome.%",
356                                                   $attrTable{$attr}[0],
357                                                   $attrTable{$attr}[1]);
358                # Record the attribute count.
359                $stats->Add($attr => scalar(@results));
360                # Record this test.
361                $retVal->Add(queries => 1);
362            }
363            # Get the genome's name.
364            my $name = $fig->genus_species($genome);
365            # Display its statistics.
366            Trace("Results for $name [$genome]: " . $stats->Display()) if T(2);
367            # Roll them into the main statistics.
368            $retVal->Accumulate($stats);
369            $retVal->Add(genomes => 1);
370        }
371        # Return the statistcs.
372        return $retVal;
373    }
374    
375  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3