[Bio] / Sprout / CorePegs.pl Repository:
ViewVC logotype

View of /Sprout/CorePegs.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Wed Feb 6 23:20:20 2008 UTC (12 years, 4 months ago) by parrello
Branch: MAIN
Added to provide feature lists for Andreas.

#!/usr/bin/perl -w

# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
# This file is part of the SEED Toolkit.
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License.
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.

=head1 Core Peg List

This is a simple script that creates a tab-delimited list of all the
features for the original core NMPDR organisms. The single positional
parameter is the name of the output file. If no output file is
specified, output will be to the standard output.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.

=item filter

Type of filtering to apply. If C<pegs>, only true PEGs will be included. If C<essential>,
only essential genes will be included. Otherwise, all genes will be included.
be displayed.




use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Sprout;
use SFXlate;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(Sprout) ],
                                              filter => ["", "filtering type: pegs or essential"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Get the Sprout object.
    my $sprout = SFXlate->new_sprout_only();
    # Check for a file name.
    if ($parameters[0]) {
        # A file was specified, so we open it.
        Open(\*STDOUT, ">$parameters[0]");
        Trace("Output will be to $parameters[0].") if T(2);
    } else {
        Trace("Standard output will be used.") if T(2);
    # Now we just output the list to the standard output.
    # Get the list of core organism genomes.
    my @genomes = $sprout->RealCoreGenomes();
    # We need to compute the filter clause, the parameters, and the
    # result columns. The base filter is by genome ID (which is the
    # first parameter). The base result column list is the
    # feature ID and assignment. Additional filtering and stuff could be
    # required by the filter option.
    my $filter = "HasFeature(from-link) = ?";
    my @parms = ('genomeID');
    my @cols = ('Feature(id)', 'Feature(assignment)');
    if ($options->{filter} eq 'pegs') {
        $filter .= ' AND Feature(type) eq ?';
        push @parms, 'peg';
    } elsif ($options->{filter} eq 'essential') {
        $filter .= ' AND Feature(essential) IS NOT NULL';
        push @cols, 'Feature(essential)';
    # Set up a counter.
    my $totalCount = 0;
    # Loop through the organisms.
    for my $genome (sort @genomes) {
        Trace("Processing $genome.") if T(3);
        # Store the genome ID in the parms.
        $parms[0] = $genome;
        # Get this organism's features according to the filter.
        my $query = $sprout->Get(['HasFeature', 'Feature'], $filter, \@parms);
        # Set up a counter.
        my $genomeCount = 0;
        # Write them to the output file.
        while (my $result = $query->Fetch()) {
            my @fields = $result->Values(\@cols);
            Tracer::PutLine(\*STDOUT, \@fields);
        # Update the counts.
        Trace("$genomeCount features found for $genome.") if T(3);
        $totalCount += $genomeCount;
    Trace("$totalCount features output.") if T(2);
    # Close the output file.
    close STDOUT;
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "Core Peg List terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3