[Bio] / Sprout / ShowCounts.pl Repository:
ViewVC logotype

Diff of /Sprout/ShowCounts.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Thu Oct 9 17:22:26 2008 UTC revision 1.19, Mon Jan 19 21:57:23 2009 UTC
# Line 3  Line 3 
3  =head1 Generate NMPDR Web Page Includes  =head1 Generate NMPDR Web Page Includes
4    
5  This script generates the include files for the NMPDR cover pages. The include  This script generates the include files for the NMPDR cover pages. The include
6  files go in the C<html/includes> directory for the current NMPDR, and are  files are turned into Wiki pages. They need to be generated after the
 based on data in the Sprout database. They need to be generated after the  
7  Sprout database is loaded, but before the cover pages are put online.  Sprout database is loaded, but before the cover pages are put online.
8    
 The include files generated are:  
   
 =over 4  
   
 =item version.inc  
   
 Displays the version date, the genome counts, the number of subsystems,  
 and the number of new annotations. This file is included in the  
 C<index.php> document.  
   
 =item forms/all_genomes.inc  
   
 Displays a form allowing the user to select an organism and see a summary of  
 its subsystems and reactions. This file is included in the  
 C<content/subsystems.php> document.  
   
 =item sproutdb.inc  
   
 Displays the documentation for the Sprout database. This file is included in  
 the C<content/docs.php> document.  
   
 =back  
   
9  The currently-supported command-line options are as follows.  The currently-supported command-line options are as follows.
10    
11  =over 4  =over 4
# Line 60  Line 36 
36    
37  Display this command's parameters and options.  Display this command's parameters and options.
38    
39    =item noalias
40    
41    Do not recompute the alias counts.
42    
43  =back  =back
44    
45  =cut  =cut
# Line 70  Line 50 
50  use File::Copy;  use File::Copy;
51  use File::Path;  use File::Path;
52  use FIG;  use FIG;
53  use CGI qw(:standard);  use CGI qw(-nosticky);
54  use WikiTools;  use WikiTools;
55    
56  # Get the command-line options and parameters.  # Get the command-line options and parameters.
57  my ($options, @parameters) = StandardSetup([qw(Sprout ERDB SFXlate WikiTools) ],  my ($options, @parameters) = StandardSetup([qw(Sprout SFXlate WikiTools) ],
58                                             {                                             {
59                                              trace => [2, 'tracing level'],                                              trace => [3, 'tracing level'],
60                                                noalias => ['', 'if specified, alias counts will not be updated'],
61                                             },                                             },
62                                             "<version date>",                                             "",
63                                             @ARGV);                                             @ARGV);
64    
65  use SFXlate;  use SFXlate;
# Line 88  Line 69 
69  eval {  eval {
70      # Get the target directory.      # Get the target directory.
71      my $targetDir = $options->{target};      my $targetDir = $options->{target};
     # Verify the parameters.  
     if (! $parameters[0]) {  
         Confess("Please specify a version date.");  
     } else {  
72          Trace("Connecting to Sprout.") if T(2);          Trace("Connecting to Sprout.") if T(2);
73          # Get the Sprout object.          # Get the Sprout object.
74          my $sfx = SFXlate->new_sprout_only();          my $sfx = SFXlate->new_sprout_only();
         # Get the old Sprout.  
         my $oldSprout = SFXlate->old_sprout_only();  
75          # Create the counts page.          # Create the counts page.
76          my $page = CreateCounts($sfx, $oldSprout, $parameters[0]);      Trace("Creating counts page.") if T(2);
77        my $page = CreateCounts($sfx);
78          Trace("Page = \n$page") if T(4);          Trace("Page = \n$page") if T(4);
79          # Put it in the wiki.          # Put it in the wiki.
80          my $wiki = WikiTools->new();          my $wiki = WikiTools->new();
81          my $rc = $wiki->Save('NmpdrVersion', 'Main', 'WebHome', $page);      SaveWikiPage($wiki, $page, 'Main', 'NmpdrVersion', 'WebHome');
82          if ($rc) {      # Create the genbank accession number table.
83              Trace("Page creation complete.") if T(2);      Trace("Creating genbank page.") if T(2);
84          } else {      $page = CreateGenBank($sfx);
85              Confess("Page creation failed: " . $wiki->{error});      Trace("Page = \n$page") if T(4);
86          }      SaveWikiPage($wiki, $page, 'Main', 'GenBankLinks', 'WebHome');
87        # Create the essential genes search page.
88        Trace("Creating essential genes form.") if T(2);
89        $page = CreateEssentials($sfx);
90        SaveWikiPage($wiki, $page, 'Main', 'EssentialGenesForm', 'WebHome');
91        # Only proceed if the user wants alias counts.
92        if (! $options->{noalias}) {
93            Trace("Generating alias counts.") if T(2);
94            # Now comes the long, painful part. We loop through the
95            # IsAlsoFoundIn relationship counting external IDs by type.
96            my $q = $sfx->Get("IsAlsoFoundIn", "", []);
97            my %counts;
98            while (my $alias = $q->Fetch()) {
99                my $dbName = $alias->PrimaryValue('to-link');
100                $counts{$dbName}++;
101            }
102            # Generate a table of the counts.
103            my @table = ("| *ID Type* |  *Count* |");
104            for my $dbName (sort keys %counts) {
105                my $thisCount = Tracer::CommaFormat($counts{$dbName});
106                push @table, "| [[$dbName]] |  $thisCount |";
107            }
108            # Save this table.
109            $page = join("\n", @table);
110            SaveWikiPage($wiki, $page, 'Main', 'AliasCounts', 'WebHome');
111      }      }
112  };  };
113  if ($@) {  if ($@) {
# Line 142  Line 142 
142    
143  A Sprout object for accessing the previous database.  A Sprout object for accessing the previous database.
144    
 =item versionDate  
   
 The string to be used for the version date.  
   
145  =item fileName  =item fileName
146    
147  The name of the output file.  The name of the output file.
# Line 160  Line 156 
156    
157  sub CreateCounts {  sub CreateCounts {
158      # Get the parameters.      # Get the parameters.
159      my ($sprout, $oldSprout, $versionDate) = @_;      my ($sprout) = @_;
160      # Start the heading.      # Start the heading.
161      my $retVal = "<h3 class=\"home\">Version of $versionDate\n";      my $retVal = "<h3 class=\"home\">This edition of the NMPDR includes ";
162      # Get the counts.      # Get the counts.
163      my @counts = $sprout->GenomeCounts();      my @counts = $sprout->GenomeCounts();
164      my @names = qw(archaeal bacterial eukaryal viral environmental);      my @names = qw(archaeal bacterial eukaryal viral environmental);
# Line 174  Line 170 
170          }          }
171      }      }
172      # Form the named counts into English, which is a very complicated process.      # Form the named counts into English, which is a very complicated process.
173      my $output = "Includes $namedCounts[0]";      $retVal .= $namedCounts[0];
174      for (my $i = 1; $i < $#namedCounts; $i++) {      for (my $i = 1; $i < $#namedCounts; $i++) {
175          $output .= ", $namedCounts[$i]";          $retVal .= ", $namedCounts[$i]";
176      }      }
177      if ($#namedCounts > 0) {      if ($#namedCounts > 0) {
178          $output .= ", and $namedCounts[$#namedCounts]";          $retVal .= ", and $namedCounts[$#namedCounts]";
     }  
     $output .= " genomes.";  
     # Output the counts.  
     $retVal .= "<br />$output";  
     # Get the number of subsystems.  
     my $subsystems = $sprout->GetCount(['Subsystem'], "", []);  
     $retVal .= "<br />$subsystems active subsystems";  
     # Count the number of new annotations, genomes, and features.  
     my %things = ( Annotation => 'annotations',  
                    Feature => 'features',  
                    Genome => 'organisms'  
                  );  
     for my $thing (sort keys %things) {  
         my $newCount = $sprout->GetCount([$thing], "", []);  
         my $oldCount = $oldSprout->GetCount([$thing], "", []);  
         my $delta = $newCount - $oldCount;  
         # Only display them if there's a reasonable number.  
         if ($delta > 5) {  
             $retVal .= ", $delta&nbsp;new&nbsp;$things{$thing}";  
         }  
179      }      }
180        $retVal .= " %FIG{genomes}% with ";
181        # Now we need to count the number of features, subsystems, and FIGfams.
182        my $subsystems = Tracer::CommaFormat($sprout->GetCount(['Subsystem'], "", []));
183        my $figFams = Tracer::CommaFormat($sprout->GetCount(['IsFamilyForFeature'],
184                                        "IsFamilyForFeature(from-link) LIKE ?",
185                                        ['FIG%']));
186        my $fids = Tracer::CommaFormat($sprout->GetCount(['Feature'], "", []));
187        $retVal .= "$fids [[FIG.Feature][genetic features]], of which $figFams are in FIG.FigFams curated using $subsystems active %FIG{subsystems}%.";
188      # Terminate the heading.      # Terminate the heading.
189      $retVal .= ".</h3>\n";      $retVal .= "</h3>\n";
190      # Return it.      # Return it.
191      return $retVal;      return $retVal;
192  }  }
193    
194    =head3 CreateGenBank
195    
196        my $page = CreateGenBank($sprout);
197    
198    Create the GenBank accession number page. This consists of a table of
199    genomes linked to the SeedViewer genome pages for all genomes with known
200    accession numbers.
201    
202    =over 4
203    
204    =item sprout
205    
206    Sprout object for accessing the database.
207    
208    =item RETURN
209    
210    Returns a Wiki page containing a table of accession numbers and genomes.
211    
212    =back
213    
214    =cut
215    
216    sub CreateGenBank {
217        # Get the parameters.
218        my ($sprout) = @_;
219        # This will map accession numbers to Genome strings.
220        my %retVal;
221        # Get the genbank accession data from the attribute database.
222        my $fig = FIG->new();
223        my @genbanks = $fig->get_attributes(undef, "GENBANK_ACC");
224        Trace(scalar(@genbanks) . " genomes have genbank data.") if T(3);
225        # Loop through the attributes.
226        for my $genbank (@genbanks) {
227            my ($genomeID, $key, $accNumbers) = @$genbank;
228            # Get this Genome's ID information.
229            my $genomeName = $sprout->GenusSpecies($genomeID);
230            # Format it for display.
231            my $genomeData = "%SV{\"$genomeName\" id=\"$genomeID\"}%";
232            # Process the individual keys.
233            for my $accNumber (split /\s*;\s*/, $accNumbers) {
234                # Only proceed if this is NOT a shotgun number.
235                # Shotgun sequences use a four-letter prefix.
236                if ($accNumber =~ /^[A-Z]{1,2}\d+/) {
237                    # Add this number to the hash.
238                    $retVal{$accNumber} = $genomeData;
239                }
240            }
241        }
242        # Convert the hash to a table.
243        my @rows = map { "| $_ | $retVal{$_} |" } sort keys %retVal;
244        return join("\n", "| *ACCN* | *Genome* |", @rows);
245    }
246    
247    
248    
249    =head3 SaveWikiPage
250    
251        SaveWikiPage($wiki, $page, $web, $name, $parent);
252    
253    Save the specified page in the wiki. If an error occurs, an exception
254    will be thrown.
255    
256    =over 4
257    
258    =item wiki
259    
260    Wiki object to be used to save the page.
261    
262    =item page
263    
264    Actual page text to store.
265    
266    =item web
267    
268    Name of the web in which to store the page.
269    
270    =item name
271    
272    Name to give to the page.
273    
274    =item parent
275    
276    Name of the parent page.
277    
278    =back
279    
280    =cut
281    
282    sub SaveWikiPage {
283        # Get the parameters.
284        my ($wiki, $page, $web, $name, $parent) = @_;
285        # Save the page.
286        Trace("Saving $name page:\n$page") if T(4);
287        my $rc = $wiki->Save($name, $web, $parent, $page);
288        if ($rc) {
289            Trace("$web.$name page creation complete.") if T(2);
290        } else {
291            Confess("$web.$name page creation failed: " . $wiki->{error});
292        }
293    }
294    
295    =head3 CreateEssentials
296    
297        my $page = CreateEssentials($sprout);
298    
299    Create the form for doing an essential genes search. It only includes genomes
300    with essential genes in them.
301    
302    =over 4
303    
304    =item sprout
305    
306    Sprout object used to access the database.
307    
308    =item RETURN
309    
310    Returns the forms as wiki markup.
311    
312    =back
313    
314    =cut
315    
316    sub CreateEssentials {
317        # Get the parameters.
318        my ($sprout) = @_;
319        # We'll build the page in here.
320        my @retVal;
321        # Turn off autolink.
322        push @retVal, "<noautolink>";
323        # Get all the genomes with essential genes.
324        my %genomes;
325        my $qh = $sprout->Search("essential", "Feature", "Feature IsInGenome Genome",
326                                 'Feature(essential) IS NOT NULL', []);
327        while (my $feature = $qh->Fetch()) {
328            my ($genomeID, $name) = $feature->Values('Genome(id) Genome(scientific-name)');
329            if (! $genomes{$genomeID}) {
330                $genomes{$genomeID} = 1;
331                Trace("Genome $name selected.") if T(3);
332            }
333        }
334        # Create the hidden-field list.
335        my @hiddens = qw(Class=GeneSearch keywords=essential Search=Go);
336        my $hiddenList = join(", ", @hiddens);
337        # Start the form.
338        push @retVal, qq(%STARTFORM{"NmpdrPlugin/search" hidden="$hiddenList"}%);
339        # Put in the genome dropdown.
340        my $dropdown = $sprout->GenomeMenu(name => 'genome', id => 'EssentialGenomeControl',
341                                           filter => \%genomes);
342        # We need to remove the line feeds.
343        $dropdown =~ s/\n/ /g;
344        push @retVal, "| *Genome* | $dropdown |";
345        # Close the form.
346        push @retVal, qq(|  %ENDFORM{"Find Essential"}%  ||), "</noautolink>";
347        # Return the result.
348        return join("\n", @retVal);
349    }
350    
351  1;  1;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3