[Bio] / Sprout / ShowCounts.pl Repository:
ViewVC logotype

View of /Sprout/ShowCounts.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (download) (as text) (annotate)
Mon Mar 2 22:33:31 2009 UTC (10 years, 6 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, rast_rel_2009_05_18, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, rast_rel_2009_03_26, mgrast_dev_10262011, HEAD
Changes since 1.19: +1 -1 lines
Changed heading style.

#!/usr/bin/perl -w

=head1 Generate NMPDR Web Page Includes

This script generates the include files for the NMPDR cover pages. The include
files are turned into Wiki pages. They need to be generated after the
Sprout database is loaded, but before the cover pages are put online.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item noalias

Do not recompute the alias counts.

=back

=cut

use strict;
use Tracer;
use Cwd;
use File::Copy;
use File::Path;
use FIG;
use CGI qw(-nosticky);
use WikiTools;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(Sprout SFXlate WikiTools) ],
                                           {
                                            trace => [3, 'tracing level'],
                                            noalias => ['', 'if specified, alias counts will not be updated'],
                                           },
                                           "",
                                           @ARGV);

use SFXlate;

# The return type (error/no error) will be put in here.
my $rtype;
eval {
    # Get the target directory.
    my $targetDir = $options->{target};
    Trace("Connecting to Sprout.") if T(2);
    # Get the Sprout object.
    my $sfx = SFXlate->new_sprout_only();
    # Create the counts page.
    Trace("Creating counts page.") if T(2);
    my $page = CreateCounts($sfx);
    Trace("Page = \n$page") if T(4);
    # Put it in the wiki.
    my $wiki = WikiTools->new();
    SaveWikiPage($wiki, $page, 'Main', 'NmpdrVersion', 'WebHome');
    # Create the genbank accession number table.
    Trace("Creating genbank page.") if T(2);
    $page = CreateGenBank($sfx);
    Trace("Page = \n$page") if T(4);
    SaveWikiPage($wiki, $page, 'Main', 'GenBankLinks', 'WebHome');
    # Create the essential genes search page.
    Trace("Creating essential genes form.") if T(2);
    $page = CreateEssentials($sfx);
    SaveWikiPage($wiki, $page, 'Main', 'EssentialGenesForm', 'WebHome');
    # Only proceed if the user wants alias counts.
    if (! $options->{noalias}) {
        Trace("Generating alias counts.") if T(2);
        # Now comes the long, painful part. We loop through the
        # IsAlsoFoundIn relationship counting external IDs by type.
        my $q = $sfx->Get("IsAlsoFoundIn", "", []);
        my %counts;
        while (my $alias = $q->Fetch()) {
            my $dbName = $alias->PrimaryValue('to-link');
            $counts{$dbName}++;
        }
        # Generate a table of the counts.
        my @table = ("| *ID Type* |  *Count* |");
        for my $dbName (sort keys %counts) {
            my $thisCount = Tracer::CommaFormat($counts{$dbName});
            push @table, "| [[$dbName]] |  $thisCount |";
        }
        # Save this table.
        $page = join("\n", @table);
        SaveWikiPage($wiki, $page, 'Main', 'AliasCounts', 'WebHome');
    }
};
if ($@) {
    Trace("Counts failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Counts complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "ShowCounts terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

=head3 CreateCounts

    my $content = CreateCounts($sprout, $oldSprout, $versionDate);

Create the count and version display for the front page of the NMPDR.

=over 4

=item sprout

A Sprout object for accessing the database.

=item oldSprout

A Sprout object for accessing the previous database.

=item fileName

The name of the output file.

=item RETURN

Returns a string that can be published for inclusion in the front page of the wiki.

=back

=cut

sub CreateCounts {
    # Get the parameters.
    my ($sprout) = @_;
    # Start the heading.
    my $retVal = "<h3>This edition of the NMPDR includes ";
    # Get the counts.
    my @counts = $sprout->GenomeCounts();
    my @names = qw(archaeal bacterial eukaryal viral environmental);
    # Get a list of named counts for all the categories with nonzero values.
    my @namedCounts = ();
    for (my $i = 0; $i <= $#names; $i++) {
        if ($counts[$i]) {
            push @namedCounts, "$counts[$i] $names[$i]";
        }
    }
    # Form the named counts into English, which is a very complicated process.
    $retVal .= $namedCounts[0];
    for (my $i = 1; $i < $#namedCounts; $i++) {
        $retVal .= ", $namedCounts[$i]";
    }
    if ($#namedCounts > 0) {
        $retVal .= ", and $namedCounts[$#namedCounts]";
    }
    $retVal .= " %FIG{genomes}% with ";
    # Now we need to count the number of features, subsystems, and FIGfams.
    my $subsystems = Tracer::CommaFormat($sprout->GetCount(['Subsystem'], "", []));
    my $figFams = Tracer::CommaFormat($sprout->GetCount(['IsFamilyForFeature'],
                                    "IsFamilyForFeature(from-link) LIKE ?",
                                    ['FIG%']));
    my $fids = Tracer::CommaFormat($sprout->GetCount(['Feature'], "", []));
    $retVal .= "$fids [[FIG.Feature][genetic features]], of which $figFams are in FIG.FigFams curated using $subsystems active %FIG{subsystems}%.";
    # Terminate the heading.
    $retVal .= "</h3>\n";
    # Return it.
    return $retVal;
}

=head3 CreateGenBank

    my $page = CreateGenBank($sprout);

Create the GenBank accession number page. This consists of a table of
genomes linked to the SeedViewer genome pages for all genomes with known
accession numbers.

=over 4

=item sprout

Sprout object for accessing the database.

=item RETURN

Returns a Wiki page containing a table of accession numbers and genomes.

=back

=cut

sub CreateGenBank {
    # Get the parameters.
    my ($sprout) = @_;
    # This will map accession numbers to Genome strings.
    my %retVal;
    # Get the genbank accession data from the attribute database.
    my $fig = FIG->new();
    my @genbanks = $fig->get_attributes(undef, "GENBANK_ACC");
    Trace(scalar(@genbanks) . " genomes have genbank data.") if T(3);
    # Loop through the attributes.
    for my $genbank (@genbanks) {
        my ($genomeID, $key, $accNumbers) = @$genbank;
        # Get this Genome's ID information.
        my $genomeName = $sprout->GenusSpecies($genomeID);
        # Format it for display.
        my $genomeData = "%SV{\"$genomeName\" id=\"$genomeID\"}%";
        # Process the individual keys.
        for my $accNumber (split /\s*;\s*/, $accNumbers) {
            # Only proceed if this is NOT a shotgun number.
            # Shotgun sequences use a four-letter prefix.
            if ($accNumber =~ /^[A-Z]{1,2}\d+/) {
                # Add this number to the hash.
                $retVal{$accNumber} = $genomeData;
            }
        }
    }
    # Convert the hash to a table.
    my @rows = map { "| $_ | $retVal{$_} |" } sort keys %retVal;
    return join("\n", "| *ACCN* | *Genome* |", @rows);
}



=head3 SaveWikiPage

    SaveWikiPage($wiki, $page, $web, $name, $parent);

Save the specified page in the wiki. If an error occurs, an exception
will be thrown.

=over 4

=item wiki

Wiki object to be used to save the page.

=item page

Actual page text to store.

=item web

Name of the web in which to store the page.

=item name

Name to give to the page.

=item parent

Name of the parent page.

=back

=cut

sub SaveWikiPage {
    # Get the parameters.
    my ($wiki, $page, $web, $name, $parent) = @_;
    # Save the page.
    Trace("Saving $name page:\n$page") if T(4);
    my $rc = $wiki->Save($name, $web, $parent, $page);
    if ($rc) {
        Trace("$web.$name page creation complete.") if T(2);
    } else {
        Confess("$web.$name page creation failed: " . $wiki->{error});
    }
}

=head3 CreateEssentials

    my $page = CreateEssentials($sprout);

Create the form for doing an essential genes search. It only includes genomes
with essential genes in them.

=over 4

=item sprout

Sprout object used to access the database.

=item RETURN

Returns the forms as wiki markup.

=back

=cut

sub CreateEssentials {
    # Get the parameters.
    my ($sprout) = @_;
    # We'll build the page in here.
    my @retVal;
    # Turn off autolink.
    push @retVal, "<noautolink>";
    # Get all the genomes with essential genes.
    my %genomes;
    my $qh = $sprout->Search("essential", "Feature", "Feature IsInGenome Genome",
                             'Feature(essential) IS NOT NULL', []);
    while (my $feature = $qh->Fetch()) {
        my ($genomeID, $name) = $feature->Values('Genome(id) Genome(scientific-name)');
        if (! $genomes{$genomeID}) {
            $genomes{$genomeID} = 1;
            Trace("Genome $name selected.") if T(3);
        }
    }
    # Create the hidden-field list.
    my @hiddens = qw(Class=GeneSearch keywords=essential Search=Go);
    my $hiddenList = join(", ", @hiddens);
    # Start the form.
    push @retVal, qq(%STARTFORM{"NmpdrPlugin/search" hidden="$hiddenList"}%);
    # Put in the genome dropdown.
    my $dropdown = $sprout->GenomeMenu(name => 'genome', id => 'EssentialGenomeControl',
                                       filter => \%genomes);
    # We need to remove the line feeds.
    $dropdown =~ s/\n/ /g;
    push @retVal, "| *Genome* | $dropdown |";
    # Close the form.
    push @retVal, qq(|  %ENDFORM{"Find Essential"}%  ||), "</noautolink>";
    # Return the result.
    return join("\n", @retVal);
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3