[Bio] / Sprout / CorePegs.pl Repository:
ViewVC logotype

View of /Sprout/CorePegs.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.2 - (download) (as text) (annotate)
Thu Feb 14 19:04:15 2008 UTC (12 years, 3 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, rast_rel_2009_05_18, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.1: +73 -51 lines
Added the "orgs" parameter to enable specifying all, all NMPDR, or only core genomes.

#!/usr/bin/perl -w

# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
# This file is part of the SEED Toolkit.
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.

=head1 Core Peg List

This is a simple script that creates a tab-delimited list of all the
features for the selected NMPDR organisms. The single positional
parameter is the name of the output file.

The currently-supported command-line options are as follows.

=over 4

=item orgs

Organisms whose features are desired. If C<all>, then all
organisms will be listed. If C<nmpdr>, then all organisms in
NMPDR groups will be listed. If C<core>, then only the organisms
in the core NMPDR groups will be listed. The default is C<core>.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.

=item filter

Type of filtering to apply. If C<pegs>, only true PEGs will be included. If C<essential>,
only essential genes will be included. Otherwise, all genes will be included.




use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Sprout;
use SFXlate;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(Sprout) ],
                                              orgs => ["core", "type of organisms (core, nmpdr, all)"],
                                              filter => ["", "filtering type: pegs or essential"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Get the Sprout object.
    my $sprout = SFXlate->new_sprout_only();
    # Now we just output the list to the standard output.
    # Next, we need to determine the genomes of interest. This
    # is determined by the "orgs" option.
    my @genomes = $sprout->CoreGenomes($options->{orgs});
    # If no genomes are found, it's an error.
    my $genomes = scalar @genomes;
    if (! $genomes) {
        Confess("No genomes found for orgs option \"$options->{org}\".");
    } else {
        Trace("$genomes genomes will be processed.") if T(2);
        # Check for a file name.
        if (! $parameters[0]) {
            Confess("No output file specified.");
        } else {
            # A file was specified, so we open it.
            my $oh = Open(undef, ">$parameters[0]");
            Trace("Output will be to $parameters[0].") if T(2);
            # We need to compute the filter clause, the parameters, and the
            # result columns. The base filter is by genome ID (which is the
            # first parameter). The base result column list is the
            # feature ID and assignment. Additional filtering and stuff could be
            # required by the filter option.
            my $filter = "HasFeature(from-link) = ?";
            my @parms = ('genomeID');
            my @cols = ('Feature(id)', 'Feature(assignment)');
            if ($options->{filter} eq 'pegs') {
                # Here we filter by feature type to get PEGs only.
                $filter .= ' AND Feature(type) eq ?';
                push @parms, 'peg';
                Trace("Filtering for PEGs.") if T(2);
            } elsif ($options->{filter} eq 'essential') {
                # Here we filter by the essentiality column.
                $filter .= ' AND Feature(essential) IS NOT NULL';
                push @cols, 'Feature(essential)';
                Trace("Filtering for essential genes.") if T(2);
            } elsif ($options->{filter}) {
                # Here the filter type is invalid.
                Confess("Unknown filter type \"$options->{filter}\".");
            # Set up a counter.
            my $totalCount = 0;
            # Loop through the organisms.
            for my $genome (sort @genomes) {
                Trace("Processing $genome.") if T(3);
                # Store the genome ID in the parms.
                $parms[0] = $genome;
                # Get this organism's features according to the filter.
                my $query = $sprout->Get(['HasFeature', 'Feature'], $filter, \@parms);
                # Set up a counter.
                my $genomeCount = 0;
                # Write them to the output file.
                while (my $result = $query->Fetch()) {
                    my @fields = $result->Values(\@cols);
                    Tracer::PutLine($oh, \@fields);
                # Update the counts.
                Trace("$genomeCount features found for $genome.") if T(3);
                $totalCount += $genomeCount;
            Trace("$totalCount features output.") if T(2);
            # Close the output file.
            close $oh;
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "Core Peg List terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3