[Bio] / Sprout / ShowCounts.pl Repository:
ViewVC logotype

Annotation of /Sprout/ShowCounts.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 Generate NMPDR Web Page Includes
4 :    
5 :     This script generates the include files for the NMPDR cover pages. The include
6 : parrello 1.19 files are turned into Wiki pages. They need to be generated after the
7 : parrello 1.1 Sprout database is loaded, but before the cover pages are put online.
8 :    
9 :     The currently-supported command-line options are as follows.
10 :    
11 :     =over 4
12 :    
13 :     =item user
14 :    
15 :     Name suffix to be used for log files. If omitted, the PID is used.
16 :    
17 :     =item trace
18 :    
19 :     Numeric trace level. A higher trace level causes more messages to appear. The
20 :     default trace level is 2. Tracing will be directly to the standard output
21 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
22 :     where I<User> is the value of the B<user> option above.
23 :    
24 :     =item sql
25 :    
26 :     If specified, turns on tracing of SQL activity.
27 :    
28 :     =item background
29 :    
30 :     Save the standard and error output to files. The files will be created
31 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
32 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
33 :     B<user> option above.
34 :    
35 :     =item h
36 :    
37 :     Display this command's parameters and options.
38 :    
39 : parrello 1.19 =item noalias
40 :    
41 :     Do not recompute the alias counts.
42 :    
43 : parrello 1.1 =back
44 :    
45 :     =cut
46 :    
47 :     use strict;
48 :     use Tracer;
49 :     use Cwd;
50 :     use File::Copy;
51 :     use File::Path;
52 :     use FIG;
53 : parrello 1.19 use CGI qw(-nosticky);
54 : parrello 1.17 use WikiTools;
55 : parrello 1.1
56 :     # Get the command-line options and parameters.
57 : parrello 1.19 my ($options, @parameters) = StandardSetup([qw(Sprout SFXlate WikiTools) ],
58 : parrello 1.1 {
59 : parrello 1.19 trace => [3, 'tracing level'],
60 :     noalias => ['', 'if specified, alias counts will not be updated'],
61 : parrello 1.1 },
62 : parrello 1.19 "",
63 : parrello 1.1 @ARGV);
64 :    
65 :     use SFXlate;
66 :    
67 : parrello 1.14 # The return type (error/no error) will be put in here.
68 :     my $rtype;
69 :     eval {
70 :     # Get the target directory.
71 :     my $targetDir = $options->{target};
72 : parrello 1.19 Trace("Connecting to Sprout.") if T(2);
73 :     # Get the Sprout object.
74 :     my $sfx = SFXlate->new_sprout_only();
75 :     # Create the counts page.
76 :     Trace("Creating counts page.") if T(2);
77 :     my $page = CreateCounts($sfx);
78 :     Trace("Page = \n$page") if T(4);
79 :     # Put it in the wiki.
80 :     my $wiki = WikiTools->new();
81 :     SaveWikiPage($wiki, $page, 'Main', 'NmpdrVersion', 'WebHome');
82 :     # Create the genbank accession number table.
83 :     Trace("Creating genbank page.") if T(2);
84 :     $page = CreateGenBank($sfx);
85 :     Trace("Page = \n$page") if T(4);
86 :     SaveWikiPage($wiki, $page, 'Main', 'GenBankLinks', 'WebHome');
87 :     # Create the essential genes search page.
88 :     Trace("Creating essential genes form.") if T(2);
89 :     $page = CreateEssentials($sfx);
90 :     SaveWikiPage($wiki, $page, 'Main', 'EssentialGenesForm', 'WebHome');
91 :     # Only proceed if the user wants alias counts.
92 :     if (! $options->{noalias}) {
93 :     Trace("Generating alias counts.") if T(2);
94 :     # Now comes the long, painful part. We loop through the
95 :     # IsAlsoFoundIn relationship counting external IDs by type.
96 :     my $q = $sfx->Get("IsAlsoFoundIn", "", []);
97 :     my %counts;
98 :     while (my $alias = $q->Fetch()) {
99 :     my $dbName = $alias->PrimaryValue('to-link');
100 :     $counts{$dbName}++;
101 :     }
102 :     # Generate a table of the counts.
103 :     my @table = ("| *ID Type* | *Count* |");
104 :     for my $dbName (sort keys %counts) {
105 :     my $thisCount = Tracer::CommaFormat($counts{$dbName});
106 :     push @table, "| [[$dbName]] | $thisCount |";
107 : parrello 1.17 }
108 : parrello 1.19 # Save this table.
109 :     $page = join("\n", @table);
110 :     SaveWikiPage($wiki, $page, 'Main', 'AliasCounts', 'WebHome');
111 : parrello 1.14 }
112 :     };
113 :     if ($@) {
114 :     Trace("Counts failed with error: $@") if T(0);
115 :     $rtype = "error";
116 : parrello 1.4 } else {
117 : parrello 1.14 Trace("Counts complete.") if T(2);
118 :     $rtype = "no error";
119 :     }
120 :     if ($options->{phone}) {
121 :     my $msgID = Tracer::SendSMS($options->{phone}, "ShowCounts terminated with $rtype.");
122 :     if ($msgID) {
123 :     Trace("Phone message sent with ID $msgID.") if T(2);
124 :     } else {
125 :     Trace("Phone message not sent.") if T(2);
126 :     }
127 : parrello 1.4 }
128 : parrello 1.1
129 :     =head3 CreateCounts
130 :    
131 : parrello 1.17 my $content = CreateCounts($sprout, $oldSprout, $versionDate);
132 : parrello 1.1
133 :     Create the count and version display for the front page of the NMPDR.
134 :    
135 :     =over 4
136 :    
137 : parrello 1.17 =item sprout
138 : parrello 1.1
139 :     A Sprout object for accessing the database.
140 :    
141 : parrello 1.17 =item oldSprout
142 : parrello 1.1
143 : parrello 1.7 A Sprout object for accessing the previous database.
144 : parrello 1.1
145 :     =item fileName
146 :    
147 :     The name of the output file.
148 :    
149 : parrello 1.17 =item RETURN
150 :    
151 :     Returns a string that can be published for inclusion in the front page of the wiki.
152 :    
153 : parrello 1.1 =back
154 :    
155 :     =cut
156 :    
157 :     sub CreateCounts {
158 :     # Get the parameters.
159 : parrello 1.19 my ($sprout) = @_;
160 : parrello 1.1 # Start the heading.
161 : parrello 1.20 my $retVal = "<h3>This edition of the NMPDR includes ";
162 : parrello 1.1 # Get the counts.
163 : parrello 1.17 my @counts = $sprout->GenomeCounts();
164 : parrello 1.1 my @names = qw(archaeal bacterial eukaryal viral environmental);
165 :     # Get a list of named counts for all the categories with nonzero values.
166 :     my @namedCounts = ();
167 :     for (my $i = 0; $i <= $#names; $i++) {
168 :     if ($counts[$i]) {
169 :     push @namedCounts, "$counts[$i] $names[$i]";
170 :     }
171 :     }
172 :     # Form the named counts into English, which is a very complicated process.
173 : parrello 1.19 $retVal .= $namedCounts[0];
174 : parrello 1.1 for (my $i = 1; $i < $#namedCounts; $i++) {
175 : parrello 1.19 $retVal .= ", $namedCounts[$i]";
176 : parrello 1.1 }
177 :     if ($#namedCounts > 0) {
178 : parrello 1.19 $retVal .= ", and $namedCounts[$#namedCounts]";
179 : parrello 1.1 }
180 : parrello 1.19 $retVal .= " %FIG{genomes}% with ";
181 :     # Now we need to count the number of features, subsystems, and FIGfams.
182 :     my $subsystems = Tracer::CommaFormat($sprout->GetCount(['Subsystem'], "", []));
183 :     my $figFams = Tracer::CommaFormat($sprout->GetCount(['IsFamilyForFeature'],
184 :     "IsFamilyForFeature(from-link) LIKE ?",
185 :     ['FIG%']));
186 :     my $fids = Tracer::CommaFormat($sprout->GetCount(['Feature'], "", []));
187 :     $retVal .= "$fids [[FIG.Feature][genetic features]], of which $figFams are in FIG.FigFams curated using $subsystems active %FIG{subsystems}%.";
188 : parrello 1.1 # Terminate the heading.
189 : parrello 1.19 $retVal .= "</h3>\n";
190 : parrello 1.17 # Return it.
191 :     return $retVal;
192 : parrello 1.1 }
193 :    
194 : parrello 1.19 =head3 CreateGenBank
195 :    
196 :     my $page = CreateGenBank($sprout);
197 :    
198 :     Create the GenBank accession number page. This consists of a table of
199 :     genomes linked to the SeedViewer genome pages for all genomes with known
200 :     accession numbers.
201 :    
202 :     =over 4
203 :    
204 :     =item sprout
205 :    
206 :     Sprout object for accessing the database.
207 :    
208 :     =item RETURN
209 :    
210 :     Returns a Wiki page containing a table of accession numbers and genomes.
211 :    
212 :     =back
213 :    
214 :     =cut
215 :    
216 :     sub CreateGenBank {
217 :     # Get the parameters.
218 :     my ($sprout) = @_;
219 :     # This will map accession numbers to Genome strings.
220 :     my %retVal;
221 :     # Get the genbank accession data from the attribute database.
222 :     my $fig = FIG->new();
223 :     my @genbanks = $fig->get_attributes(undef, "GENBANK_ACC");
224 :     Trace(scalar(@genbanks) . " genomes have genbank data.") if T(3);
225 :     # Loop through the attributes.
226 :     for my $genbank (@genbanks) {
227 :     my ($genomeID, $key, $accNumbers) = @$genbank;
228 :     # Get this Genome's ID information.
229 :     my $genomeName = $sprout->GenusSpecies($genomeID);
230 :     # Format it for display.
231 :     my $genomeData = "%SV{\"$genomeName\" id=\"$genomeID\"}%";
232 :     # Process the individual keys.
233 :     for my $accNumber (split /\s*;\s*/, $accNumbers) {
234 :     # Only proceed if this is NOT a shotgun number.
235 :     # Shotgun sequences use a four-letter prefix.
236 :     if ($accNumber =~ /^[A-Z]{1,2}\d+/) {
237 :     # Add this number to the hash.
238 :     $retVal{$accNumber} = $genomeData;
239 :     }
240 :     }
241 :     }
242 :     # Convert the hash to a table.
243 :     my @rows = map { "| $_ | $retVal{$_} |" } sort keys %retVal;
244 :     return join("\n", "| *ACCN* | *Genome* |", @rows);
245 :     }
246 :    
247 :    
248 :    
249 :     =head3 SaveWikiPage
250 :    
251 :     SaveWikiPage($wiki, $page, $web, $name, $parent);
252 : parrello 1.1
253 : parrello 1.19 Save the specified page in the wiki. If an error occurs, an exception
254 :     will be thrown.
255 :    
256 :     =over 4
257 :    
258 :     =item wiki
259 :    
260 :     Wiki object to be used to save the page.
261 :    
262 :     =item page
263 :    
264 :     Actual page text to store.
265 :    
266 :     =item web
267 :    
268 :     Name of the web in which to store the page.
269 :    
270 :     =item name
271 :    
272 :     Name to give to the page.
273 :    
274 :     =item parent
275 :    
276 :     Name of the parent page.
277 :    
278 :     =back
279 :    
280 :     =cut
281 :    
282 :     sub SaveWikiPage {
283 :     # Get the parameters.
284 :     my ($wiki, $page, $web, $name, $parent) = @_;
285 :     # Save the page.
286 :     Trace("Saving $name page:\n$page") if T(4);
287 :     my $rc = $wiki->Save($name, $web, $parent, $page);
288 :     if ($rc) {
289 :     Trace("$web.$name page creation complete.") if T(2);
290 :     } else {
291 :     Confess("$web.$name page creation failed: " . $wiki->{error});
292 :     }
293 :     }
294 :    
295 :     =head3 CreateEssentials
296 :    
297 :     my $page = CreateEssentials($sprout);
298 :    
299 :     Create the form for doing an essential genes search. It only includes genomes
300 :     with essential genes in them.
301 :    
302 :     =over 4
303 :    
304 :     =item sprout
305 :    
306 :     Sprout object used to access the database.
307 :    
308 :     =item RETURN
309 :    
310 :     Returns the forms as wiki markup.
311 :    
312 :     =back
313 :    
314 :     =cut
315 :    
316 :     sub CreateEssentials {
317 :     # Get the parameters.
318 :     my ($sprout) = @_;
319 :     # We'll build the page in here.
320 :     my @retVal;
321 :     # Turn off autolink.
322 :     push @retVal, "<noautolink>";
323 :     # Get all the genomes with essential genes.
324 :     my %genomes;
325 :     my $qh = $sprout->Search("essential", "Feature", "Feature IsInGenome Genome",
326 :     'Feature(essential) IS NOT NULL', []);
327 :     while (my $feature = $qh->Fetch()) {
328 :     my ($genomeID, $name) = $feature->Values('Genome(id) Genome(scientific-name)');
329 :     if (! $genomes{$genomeID}) {
330 :     $genomes{$genomeID} = 1;
331 :     Trace("Genome $name selected.") if T(3);
332 :     }
333 :     }
334 :     # Create the hidden-field list.
335 :     my @hiddens = qw(Class=GeneSearch keywords=essential Search=Go);
336 :     my $hiddenList = join(", ", @hiddens);
337 :     # Start the form.
338 :     push @retVal, qq(%STARTFORM{"NmpdrPlugin/search" hidden="$hiddenList"}%);
339 :     # Put in the genome dropdown.
340 :     my $dropdown = $sprout->GenomeMenu(name => 'genome', id => 'EssentialGenomeControl',
341 :     filter => \%genomes);
342 :     # We need to remove the line feeds.
343 :     $dropdown =~ s/\n/ /g;
344 :     push @retVal, "| *Genome* | $dropdown |";
345 :     # Close the form.
346 :     push @retVal, qq(| %ENDFORM{"Find Essential"}% ||), "</noautolink>";
347 :     # Return the result.
348 :     return join("\n", @retVal);
349 :     }
350 : parrello 1.1
351 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3