#!/usr/bin/perl -w # # Copyright (c) 2003-2006 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # =head1 Core Peg List This is a simple script that creates a tab-delimited list of all the features for the selected NMPDR organisms. The single positional parameter is the name of the output file. The currently-supported command-line options are as follows. =over 4 =item orgs Organisms whose features are desired. If C, then all organisms will be listed. If C, then all organisms in NMPDR groups will be listed. If C, then only the organisms in the core NMPDR groups will be listed. The default is C. =item user Name suffix to be used for log files. If omitted, the PID is used. =item trace Numeric trace level. A higher trace level causes more messages to appear. The default trace level is 2. Tracing will be directly to the standard output as well as to a CIC<.log> file in the FIG temporary directory, where I is the value of the B option above. =item sql If specified, turns on tracing of SQL activity. =item background Save the standard and error output to files. The files will be created in the FIG temporary directory and will be named CIC<.log> and CIC<.log>, respectively, where I is the value of the B option above. =item h Display this command's parameters and options. =item phone Phone number to message when the script is complete. =item filter Type of filtering to apply. If C, only true PEGs will be included. If C, only essential genes will be included. Otherwise, all genes will be included. =item =back =cut use strict; use Tracer; use DocUtils; use TestUtils; use Sprout; use SFXlate; # Get the command-line options and parameters. my ($options, @parameters) = StandardSetup([qw(Sprout) ], { orgs => ["core", "type of organisms (core, nmpdr, all)"], filter => ["", "filtering type: pegs or essential"], phone => ["", "phone number (international format) to call when load finishes"], }, "", @ARGV); # Set a variable to contain return type information. my $rtype; # Insure we catch errors. eval { # Get the Sprout object. my $sprout = SFXlate->new_sprout_only(); # Now we just output the list to the standard output. # Next, we need to determine the genomes of interest. This # is determined by the "orgs" option. my @genomes = $sprout->CoreGenomes($options->{orgs}); # If no genomes are found, it's an error. my $genomes = scalar @genomes; if (! $genomes) { Confess("No genomes found for orgs option \"$options->{org}\"."); } else { Trace("$genomes genomes will be processed.") if T(2); # Check for a file name. if (! $parameters[0]) { Confess("No output file specified."); } else { # A file was specified, so we open it. my $oh = Open(undef, ">$parameters[0]"); Trace("Output will be to $parameters[0].") if T(2); # We need to compute the filter clause, the parameters, and the # result columns. The base filter is by genome ID (which is the # first parameter). The base result column list is the # feature ID and assignment. Additional filtering and stuff could be # required by the filter option. my $filter = "HasFeature(from-link) = ?"; my @parms = ('genomeID'); my @cols = ('Feature(id)', 'Feature(assignment)'); if ($options->{filter} eq 'pegs') { # Here we filter by feature type to get PEGs only. $filter .= ' AND Feature(type) eq ?'; push @parms, 'peg'; Trace("Filtering for PEGs.") if T(2); } elsif ($options->{filter} eq 'essential') { # Here we filter by the essentiality column. $filter .= ' AND Feature(essential) IS NOT NULL'; push @cols, 'Feature(essential)'; Trace("Filtering for essential genes.") if T(2); } elsif ($options->{filter}) { # Here the filter type is invalid. Confess("Unknown filter type \"$options->{filter}\"."); } # Set up a counter. my $totalCount = 0; # Loop through the organisms. for my $genome (sort @genomes) { Trace("Processing $genome.") if T(3); # Store the genome ID in the parms. $parms[0] = $genome; # Get this organism's features according to the filter. my $query = $sprout->Get(['HasFeature', 'Feature'], $filter, \@parms); # Set up a counter. my $genomeCount = 0; # Write them to the output file. while (my $result = $query->Fetch()) { my @fields = $result->Values(\@cols); Tracer::PutLine($oh, \@fields); $genomeCount++; } # Update the counts. Trace("$genomeCount features found for $genome.") if T(3); $totalCount += $genomeCount; } Trace("$totalCount features output.") if T(2); # Close the output file. close $oh; } } }; if ($@) { Trace("Script failed with error: $@") if T(0); $rtype = "error"; } else { Trace("Script complete.") if T(2); $rtype = "no error"; } if ($options->{phone}) { my $msgID = Tracer::SendSMS($options->{phone}, "Core Peg List terminated with $rtype."); if ($msgID) { Trace("Phone message sent with ID $msgID.") if T(2); } else { Trace("Phone message not sent.") if T(2); } } 1;