[Bio] / Sprout / NmpdrCheck.pl Repository:
ViewVC logotype

View of /Sprout/NmpdrCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Jan 19 21:57:23 2009 UTC (10 years, 7 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2009_02_05, rast_rel_2009_03_26
Changes since 1.1: +140 -5 lines
Improved site maintenance utilities.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use strict;
use Tracer;
use FIG;
use SFXlate;
use Sprout;
use Stats;

=head1 NmpdrCheck Script

=head2 Introduction

This script performs useful NMPDR validation functions. The various command-line
options direct it to perform basic tests of the NMPDR data.

=head2 Command-Line Options

=over 4

=item trace

Specifies the tracing level. The higher the tracing level, the more messages
will appear in the trace log. Use E to specify emergency tracing.

=item subsystems

This option lists all the SEED subsystems, indicating which are in the Sprout
and which are marked for NMPDR but not yet in the Sprout, and what the status is
of each diagram.

=item bbhs

This option lists all of the NMPDR genomes, along with the number of BBHs
available for each. This is useful for determining whether or not BBHs
exist for all genomes.

=item attrCheck

This option loops through the NMPDR genomes defined in the SEED,
looking for the presence of special attributes. This is useful for
verifying the accuracy of a load.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item help

Display this command's parameters and options.

=item warn

Create an event in the RSS feed when an error occurs.

=item phone

Phone number to message when the script is complete.

=back

=cut

# This hash maps option names to subroutine names. To add an option, you must
# create a subroutine to process it, add POD documentation above, add a summary
# to the StandardSetup call below, and map its name to the subroutine name in
# this constant.
my %OptionMap = (
                 subsystems => 'CheckSubsystems',
                 bbhs => 'CheckBBHs',
                 attrCheck => 'CheckSeedAttrs'
                );
# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(SproutSubsys Sprout) ],
                                           {
                                              trace => ["2", "tracing level"],
                                              subsystems => ["", "if specified, will verify the subsystem list"],
                                              attrCheck => ["", "if specified, will display attribute data for NMPDR genomes in the SEED"],
                                              bbhs => ["", "if specified, will verify the BBHs on the BBH server"],
                                              phone => ["", "phone number (international format) to call when load finishes"]
                                           },
                                           "",
                                           @ARGV);
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Create the FIG object.
    my $fig = FIG->new();
    # Create the SFXlate object.
    my $sfx = SFXlate->new();
    # Create the statistics object.
    my $stats = Stats->new();
    # Display version information.
    Trace("NMPDR version $FIG_Config::version type $FIG_Config::nmpdr_site_type.") if T(2);
    # Process according to the options specified.
    for my $option (sort keys %OptionMap) {
        # Is this option specified?
        if ($options->{$option}) {
            $stats->Add(checks => 1);
            # Call the method.
            my $newStats = eval("$OptionMap{$option}(\$fig, \$sfx)");
            if ($@) {
                Trace("Error in $option check: $@") if T(0);
                $stats->Add(crashes => 1);
            } else {
                # Display the statistics.
                Trace("Statistics for $option check:\n" . $newStats->Show()) if T(2);
                # Roll up the statistics.
                $stats->Accumulate($newStats);
            }
        }
    }
    # If there was an error, or more than one test, display the rolled statistics.
    if ($stats->Ask('checks') > 1 || $stats->Ask('crashes')) {
        Trace("Summary statistics for all checks:\n" . $stats->Show()) if T(2);
    }
};
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "NmpdrCheck terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

=head3 CheckSubsystems

    my $stats = CheckSubsystems($fig, $sfx);

Loop through all of the SEED subsystems, listing their NMPDR status and
enumerating the diagrams.

=over 4

=item fig

[[FigPm]] object for accessing the SEED data store.

=item sfx

[[SFXlatePm]] object for accessing the NMPDE database.

=item RETURN

Returns a statistics object with a summary of what happened.

=back

=cut

sub CheckSubsystems {
    # Get the parameters.
    my ($fig, $sfx) = @_;
    # Create the statistics object.
    my $retVal = Stats->new(qw(nmpdrPure nmpdrInserting nmpdrDeleting nmpdrExcluded));
    # Get all of the SEED subsystems.
    my %inSeed = map { $_ => 1 } $fig->all_subsystems();
    # Get all of the NMPDR subsystems.
    my %inNmpdr = map { $_ => 1 } $sfx->all_subsystems();
    # Loop through the SEED subsystems.
    for my $subName (sort keys %inSeed) {
        Trace("Processing SEED subsystem $subName.") if T(3);
        my $markedNmpdr = $fig->nmpdr_subsystem($subName);
        # We'll accumulate data lines for the subsystem in here.
        my @lines;
        # Determine the NMPDR status.
        my $status;
        if ($inNmpdr{$subName} && $markedNmpdr) {
            push @lines, "Subsystem is marked NMPDR and is found in the NMPDR database.";
            $status = 'nmpdrPure';
        } elsif ($inNmpdr{$subName}) {
            push @lines, "Subsystem is not marked NMPDR and is found in the NMPDR database.";
            $status = 'nmpdrDeleting';
        } elsif ($markedNmpdr) {
            push @lines, "Subsystem is marked NMPDR and is not found in the NMPDR database.";
            $status = 'nmpdrInserting';
        } else {
            push @lines, "Subsystem is not marked NMPDR and is not found in the NMPDR database.";
            $status = 'nmpdrExcluded';
        }
        $retVal->Add($status => 1);
        # Get the SEED subsystem object.
        Trace("Retrieving subsystem object for $subName.") if T(3);
        my $subData = $fig->get_subsystem($subName);
        if (! $subData) {
            push @lines, "ERROR: Subsystem object cannot be created.";
            $retVal->Add(seedSubError => 1);
        } else {
            $retVal->Add(seedSubFound => 1);
            # Check the diagrams.
            my @diagrams = $subData->get_diagrams();
            for my $diagramData (@diagrams) {
                $retVal->Add(subDiagram => 1);
                my ($diagramID) = @{$diagramData};
                my $diagramFlag = ($subData->is_new_diagram($diagramID) ? "new" : "old");
                $retVal->Add("subDiagram" . ucfirst($diagramFlag) => 1);
                push @lines, "Diagram $diagramID is $diagramFlag format.";
            }
        }
        my $thing = join("\n   ", "Results for $subName", @lines);
        Trace($thing) if T(2);
    }
    # Now check for deleted subsystems.
    for my $subName (sort keys %inNmpdr) {
        if (! $inSeed{$subName}) {
            Trace("Subsystem $subName is in NMPDR but deleted from SEED.") if T(2);
            $retVal->Add(nmpdrDeleted => 1);
        }
    }
    # Return the statistics object.
    return $retVal;
}

=head3 CheckBBHs

    my $stats = CheckBBHs($fig, $sfx);

Loop through all of the Sprout genomes, listing their BBH count.

=over 4

=item fig

[[FigPm]] object for accessing the SEED data store.

=item sfx

[[SFXlatePm]] object for accessing the NMPDE database.

=item RETURN

Returns a statistics object with a summary of what happened.

=back

=cut

sub CheckBBHs {
    my ($fig, $sfx) = @_;
    # Create the staitstics object to return to the caller.
    my $retVal = Stats->new();
    # Get the list of genomes.
    my @genomes = $sfx->all_genomes();
    # Get the genome names.
    my %genomeNames = ();
    for my $genome (@genomes) {
        my $name = $sfx->genus_species($genome) . " [$genome]";
        $genomeNames{$name} = $genome;
    }
    # Process the genomes in name order.
    for my $name (sort keys %genomeNames) {
        my $genome = $genomeNames{$name};
        # Count this genome's BBHs.
        my $count = FIGRules::BatchBBHs("fig|$genome.%", 1e-10);
        # A count of 0 is bad.
        if (! $count) {
            Trace("$name has no BBHs. ***") if T(1);
            $retVal->Add(badGenomes => 1);
        } else {
            Trace("$name BBH count is $count.") if T(3);
            $retVal->Add(bbhCount => $count);
        }
        $retVal->Add(genomes => 1);
    }
    # Tell the user how bad things are.
    Trace($retVal->Ask('badGenomes') . " out of " . $retVal->Ask('genomes') .
          " genomes had no BBHs.") if T(2);
    # Return the stats.
    return $retVal;
}

=head3 CheckSeedAttrs

    my $stats = CheckSeedAttrs($fig, $sfx);

Loop through all of the SEED genomes marked for the NMPDR,
listing their special attributes.

=over 4

=item fig

[[FigPm]] object for accessing the SEED data store.

=item sfx

[[SFXlatePm]] object for accessing the NMPDE database.

=item RETURN

Returns a statistics object with a summary of what happened.

=back

=cut

sub CheckSeedAttrs {
    my ($fig, $sfx) = @_;
    # This table is used to determine how we want to look for attributes.
    my %attrTable = (
        CDD =>      ['CDD',     undef],
        PSORT =>    ['PSORT',   undef],
        Phobius =>  ['Phobius', undef],
        IEDB =>     ['iedb%',   undef],
        essential =>[undef,     'essential'],
        virulent => ['virulen%',undef],
    );
    # Get a statistics object to return to the caller.
    my $retVal = Stats->new();
    # Get all the NMPDR genomes.
    my @genomes = $fig->genomes(1);
    Trace(scalar(@genomes) . " genomes found.") if T(2);
    for my $genome (@genomes) {
        # Create a stats object for this genome.
        my $stats = Stats->new(keys %attrTable);
        # Look for this genome's attributes.
        for my $attr (keys %attrTable) {
            my @results = $fig->get_attributes("fig|$genome.%",
                                               $attrTable{$attr}[0],
                                               $attrTable{$attr}[1]);
            # Record the attribute count.
            $stats->Add($attr => scalar(@results));
            # Record this test.
            $retVal->Add(queries => 1);
        }
        # Get the genome's name.
        my $name = $fig->genus_species($genome);
        # Display its statistics.
        Trace("Results for $name [$genome]: " . $stats->Display()) if T(2);
        # Roll them into the main statistics.
        $retVal->Accumulate($stats);
        $retVal->Add(genomes => 1);
    }
    # Return the statistcs.
    return $retVal;
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3