[Bio] / Sprout / NmpdrStats.pl Repository:
ViewVC logotype

View of /Sprout/NmpdrStats.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Tue Jun 16 16:32:21 2009 UTC (9 years, 10 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_10262011, HEAD
Changes since 1.3: +2 -1 lines
Fixed to use the correct socket for the mirror database check.

#!/usr/bin/perl -w

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
#
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use strict;
use Tracer;
use SFXlate;
use Sprout;
use DBMaster;
use DBKernel;

=head1 NmpdrStats Script

    NmpdrStats [options] 

Display the NMPDR statistics for the current month.

=head2 Introduction

This script analyzes the NMPDR databases and the job queues for RAST and MG-RAST
in order to compute the monthly values for the [[Main.CountsAndStatistics page.

=head2 Command-Line Options

=over 4

=item trace

Specifies the tracing level. The higher the tracing level, the more messages
will appear in the trace log. Use E to specify emergency tracing.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item help

Display this command's parameters and options.

=item warn

Create an event in the RSS feed when an error occurs.

=item phone

Phone number to message when the script is complete.

=item sprout

Only display the sprout-related statistics. This is useful if you're
checking to determine if there is sufficient new data to justify an
NMPDR update.

=back

=cut

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(ERDB) ],
                                           {
                                              trace => ["2", "tracing level"],
                                              sprout => ["", "if specified, only the Sprout-related counts will be shown"],
                                              phone => ["", "phone number (international format) to call when load finishes"]
                                           },
                                           "",
                                           @ARGV);
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Get access to the SEED mirror database. We want to compare the genome and
    # counts to see if they've changed.
    Trace("Computing SEED genome count.") if T(2);
    my %servers = (Annotator => $FIG_Config::anno_host, Mirror => 'localhost');
    for my $server (sort keys %servers) {
        my $dbh = DBKernel->new($FIG_Config::dbms, $FIG_Config::anno_db,
                                $FIG_Config::dbuser, $FIG_Config::dbpass,
                                $FIG_Config::dbport, $servers{$server},
                                $FIG_Config::dbsock);
        my $results = $dbh->SQL("SELECT COUNT(genome) FROM genome WHERE complete = 1");
        if (! $results) {
            Trace("Query failed for SEED genome count on $servers{$server}.") if T(1);
        } else {
            Trace("$server SEED genome count is $results->[0]->[0]") if T(2);
        }
    }
    # Create the SPROUT object.
    my $sfx = SFXlate->new();
    my $sprout = $sfx->{sprout};
    # This table tells us how to get most of the counts we need.
    my %countParams = (Genomes =>    ['Genome', '', []],
                       Features =>   ['Feature', '', []],
                       'DB XREFS' => ['IsAlsoFoundIn', '', []],
                       Subsystems => ['Subsystem', '', []],
                       'FIGfam coverage' =>
                                     ['IsFamilyForFeature',
                                      "IsFamilyForFeature(from-link) LIKE ?",
                                      ['FIG%']]);
    # This list gives us the order in which we want to spit out the counts.
    my @countList = ('Genomes', 'Features', 'Base Pairs', 'DB XREFS',
                     'FIGfam coverage', 'Subsystems', 'Registered Users',
                     'RAST Jobs', 'MG-RAST Jobs', 'Diagrams', 'Scenarios',
                     'FullyFunctionalSubsystems');
    # We'll put the actual counts in here.
    my %counts;
    # First, get the Sprout data counts.
    Trace("Calculating Sprout counts.") if T(2);
    for my $count (keys %countParams) {
        my @parms = @{$countParams{$count}};
        $counts{$count} = $sprout->GetCount(@parms); 
        Trace("$count is $counts{$count}.") if T(3);
    }
    # Now we compute the number of base pairs.
    Trace("Computing DNA lengths.") if T(2);
    my $query = $sprout->Get('Genome', '', []);
    while (my $genome = $query->Fetch()) {
        $counts{'Base Pairs'} += $genome->PrimaryValue('dna-size');
    }
    # Next we analyze subsystems.
    my @ss = $sfx->all_subsystems();
    my ($bestSS, $bestSSTotal) = (undef, 0);
    # Count the number of subsystems with diagrams and scenarios.
    for my $ss (@ss) {
        my $ssData = $sfx->get_subsystem($ss);
        my @dd = $ssData->get_diagrams;
        $counts{Diagrams} += scalar @dd;
        my @hopes = $ssData->get_hope_scenario_names();
        $counts{Scenarios} += scalar @hopes;
        if (scalar(@dd) > 0 && scalar(@hopes) > 0) {
            $counts{FullyFunctionalSubsystems}++;
            my $functionality = scalar(@dd) + scalar(@hopes);
            if ($functionality > $bestSSTotal) {
                $bestSS = $ss;
                $bestSSTotal = $functionality;
            }
        }
    }
    # The rest of this is skipped if the user specified the "sprout" option.
    if (! $options->{sprout}) {
        # Now, get the number of registered users. This requires polling the
        # WebAppBackend database.
        Trace("Calculating user count.") if T(2);
        my $backend = DBMaster->new(-database => $FIG_Config::webapplication_db,
                                    -host => $FIG_Config::webapplication_host,
                                    -user => $FIG_Config::webapplication_user);
        # This is a bit of a trick. We get the complete list of users in a scalar
        # context, which yields the user count.
        $counts{'Registered Users'} = @{$backend->User->get_objects()};
        # Now we count the RAST and MG-RAST jobs. Each job is assigned a unique
        # sequential number that becomes its sub-directory name in the "jobs"
        # directory. To get the job count, we find the numerically largest
        # directory name.
        Trace("Calculating job counts.") if T(2);
        my %jobCounts = ('RAST Jobs'    => $FIG_Config::rast_jobs,
                         'MG-RAST Jobs' => $FIG_Config::mgrast_jobs);
        for my $jobType (keys %jobCounts) {
            my $dir = $jobCounts{$jobType};
            Trace("Counting $jobType in $dir.") if T(3);
            my $best = 0;
            for my $jobID (OpenDir($dir, 1)) {
                # Insure this is a valid job directory.
                if ($jobID =~ /^\d+$/ && -f "$dir/$jobID/meta.xml") {
                    # It is, so figure out if it's the best.
                    $best = $jobID if ($jobID > $best);
                }
            }
            $counts{$jobType} = $best;
        }
    }
    # Now we print the results.
    for my $count (@countList) {
        my $countValue = $counts{$count};
        if (defined $countValue) {
            Trace("$count = " . Tracer::CommaFormat($countValue)) if T(2);
        }
    }
    Trace("Best subsystem = $bestSS.") if T(2);
};
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "NmpdrStats terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3