[Bio] / Sprout / NmpdrStats.pl Repository:
ViewVC logotype

Annotation of /Sprout/NmpdrStats.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     use strict;
21 :     use Tracer;
22 :     use SFXlate;
23 :     use Sprout;
24 :     use DBMaster;
25 :     use DBKernel;
26 :    
27 :     =head1 NmpdrStats Script
28 :    
29 :     NmpdrStats [options]
30 :    
31 :     Display the NMPDR statistics for the current month.
32 :    
33 :     =head2 Introduction
34 :    
35 :     This script analyzes the NMPDR databases and the job queues for RAST and MG-RAST
36 :     in order to compute the monthly values for the [[Main.CountsAndStatistics page.
37 :    
38 :     =head2 Command-Line Options
39 :    
40 :     =over 4
41 :    
42 :     =item trace
43 :    
44 :     Specifies the tracing level. The higher the tracing level, the more messages
45 :     will appear in the trace log. Use E to specify emergency tracing.
46 :    
47 :     =item user
48 :    
49 :     Name suffix to be used for log files. If omitted, the PID is used.
50 :    
51 :     =item sql
52 :    
53 :     If specified, turns on tracing of SQL activity.
54 :    
55 :     =item background
56 :    
57 :     Save the standard and error output to files. The files will be created
58 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
59 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
60 :     B<user> option above.
61 :    
62 :     =item help
63 :    
64 :     Display this command's parameters and options.
65 :    
66 :     =item warn
67 :    
68 :     Create an event in the RSS feed when an error occurs.
69 :    
70 :     =item phone
71 :    
72 :     Phone number to message when the script is complete.
73 :    
74 :     =item sprout
75 :    
76 :     Only display the sprout-related statistics. This is useful if you're
77 :     checking to determine if there is sufficient new data to justify an
78 :     NMPDR update.
79 :    
80 :     =back
81 :    
82 :     =cut
83 :    
84 :     # Get the command-line options and parameters.
85 :     my ($options, @parameters) = StandardSetup([qw(ERDB) ],
86 :     {
87 :     trace => ["2", "tracing level"],
88 :     sprout => ["", "if specified, only the Sprout-related counts will be shown"],
89 :     phone => ["", "phone number (international format) to call when load finishes"]
90 :     },
91 :     "",
92 :     @ARGV);
93 :     # Set a variable to contain return type information.
94 :     my $rtype;
95 :     # Insure we catch errors.
96 :     eval {
97 :     # Get access to the SEED mirror database. We want to compare the genome and
98 :     # counts to see if they've changed.
99 :     Trace("Computing SEED genome count.") if T(2);
100 :     my %servers = (Annotator => $FIG_Config::anno_host, Mirror => 'localhost');
101 :     for my $server (sort keys %servers) {
102 :     my $dbh = DBKernel->new($FIG_Config::dbms, $FIG_Config::anno_db,
103 :     $FIG_Config::dbuser, $FIG_Config::dbpass,
104 : parrello 1.4 $FIG_Config::dbport, $servers{$server},
105 :     $FIG_Config::dbsock);
106 : parrello 1.1 my $results = $dbh->SQL("SELECT COUNT(genome) FROM genome WHERE complete = 1");
107 :     if (! $results) {
108 :     Trace("Query failed for SEED genome count on $servers{$server}.") if T(1);
109 :     } else {
110 :     Trace("$server SEED genome count is $results->[0]->[0]") if T(2);
111 :     }
112 :     }
113 :     # Create the SPROUT object.
114 : parrello 1.2 my $sfx = SFXlate->new();
115 :     my $sprout = $sfx->{sprout};
116 : parrello 1.1 # This table tells us how to get most of the counts we need.
117 :     my %countParams = (Genomes => ['Genome', '', []],
118 :     Features => ['Feature', '', []],
119 :     'DB XREFS' => ['IsAlsoFoundIn', '', []],
120 : parrello 1.3 Subsystems => ['Subsystem', '', []],
121 : parrello 1.1 'FIGfam coverage' =>
122 :     ['IsFamilyForFeature',
123 :     "IsFamilyForFeature(from-link) LIKE ?",
124 : parrello 1.2 ['FIG%']]);
125 : parrello 1.1 # This list gives us the order in which we want to spit out the counts.
126 :     my @countList = ('Genomes', 'Features', 'Base Pairs', 'DB XREFS',
127 :     'FIGfam coverage', 'Subsystems', 'Registered Users',
128 : parrello 1.2 'RAST Jobs', 'MG-RAST Jobs', 'Diagrams', 'Scenarios',
129 :     'FullyFunctionalSubsystems');
130 : parrello 1.1 # We'll put the actual counts in here.
131 :     my %counts;
132 :     # First, get the Sprout data counts.
133 :     Trace("Calculating Sprout counts.") if T(2);
134 :     for my $count (keys %countParams) {
135 :     my @parms = @{$countParams{$count}};
136 :     $counts{$count} = $sprout->GetCount(@parms);
137 :     Trace("$count is $counts{$count}.") if T(3);
138 :     }
139 :     # Now we compute the number of base pairs.
140 :     Trace("Computing DNA lengths.") if T(2);
141 :     my $query = $sprout->Get('Genome', '', []);
142 :     while (my $genome = $query->Fetch()) {
143 :     $counts{'Base Pairs'} += $genome->PrimaryValue('dna-size');
144 :     }
145 : parrello 1.2 # Next we analyze subsystems.
146 :     my @ss = $sfx->all_subsystems();
147 :     my ($bestSS, $bestSSTotal) = (undef, 0);
148 :     # Count the number of subsystems with diagrams and scenarios.
149 :     for my $ss (@ss) {
150 :     my $ssData = $sfx->get_subsystem($ss);
151 :     my @dd = $ssData->get_diagrams;
152 :     $counts{Diagrams} += scalar @dd;
153 :     my @hopes = $ssData->get_hope_scenario_names();
154 :     $counts{Scenarios} += scalar @hopes;
155 :     if (scalar(@dd) > 0 && scalar(@hopes) > 0) {
156 :     $counts{FullyFunctionalSubsystems}++;
157 :     my $functionality = scalar(@dd) + scalar(@hopes);
158 :     if ($functionality > $bestSSTotal) {
159 :     $bestSS = $ss;
160 :     $bestSSTotal = $functionality;
161 :     }
162 :     }
163 :     }
164 : parrello 1.1 # The rest of this is skipped if the user specified the "sprout" option.
165 :     if (! $options->{sprout}) {
166 :     # Now, get the number of registered users. This requires polling the
167 :     # WebAppBackend database.
168 :     Trace("Calculating user count.") if T(2);
169 :     my $backend = DBMaster->new(-database => $FIG_Config::webapplication_db,
170 :     -host => $FIG_Config::webapplication_host,
171 :     -user => $FIG_Config::webapplication_user);
172 :     # This is a bit of a trick. We get the complete list of users in a scalar
173 :     # context, which yields the user count.
174 :     $counts{'Registered Users'} = @{$backend->User->get_objects()};
175 :     # Now we count the RAST and MG-RAST jobs. Each job is assigned a unique
176 :     # sequential number that becomes its sub-directory name in the "jobs"
177 :     # directory. To get the job count, we find the numerically largest
178 :     # directory name.
179 :     Trace("Calculating job counts.") if T(2);
180 :     my %jobCounts = ('RAST Jobs' => $FIG_Config::rast_jobs,
181 :     'MG-RAST Jobs' => $FIG_Config::mgrast_jobs);
182 :     for my $jobType (keys %jobCounts) {
183 :     my $dir = $jobCounts{$jobType};
184 :     Trace("Counting $jobType in $dir.") if T(3);
185 :     my $best = 0;
186 :     for my $jobID (OpenDir($dir, 1)) {
187 :     # Insure this is a valid job directory.
188 :     if ($jobID =~ /^\d+$/ && -f "$dir/$jobID/meta.xml") {
189 :     # It is, so figure out if it's the best.
190 :     $best = $jobID if ($jobID > $best);
191 :     }
192 :     }
193 :     $counts{$jobType} = $best;
194 :     }
195 :     }
196 :     # Now we print the results.
197 :     for my $count (@countList) {
198 :     my $countValue = $counts{$count};
199 :     if (defined $countValue) {
200 :     Trace("$count = " . Tracer::CommaFormat($countValue)) if T(2);
201 :     }
202 :     }
203 : parrello 1.2 Trace("Best subsystem = $bestSS.") if T(2);
204 : parrello 1.1 };
205 :     if ($@) {
206 :     Trace("Script failed with error: $@") if T(0);
207 :     $rtype = "error";
208 :     } else {
209 :     Trace("Script complete.") if T(2);
210 :     $rtype = "no error";
211 :     }
212 :     if ($options->{phone}) {
213 :     my $msgID = Tracer::SendSMS($options->{phone}, "NmpdrStats terminated with $rtype.");
214 :     if ($msgID) {
215 :     Trace("Phone message sent with ID $msgID.") if T(2);
216 :     } else {
217 :     Trace("Phone message not sent.") if T(2);
218 :     }
219 :     }
220 :    
221 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3