Parent Directory
|
Revision Log
Added the "orgs" parameter to enable specifying all, all NMPDR, or only core genomes.
#!/usr/bin/perl -w # # Copyright (c) 2003-2006 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # =head1 Core Peg List This is a simple script that creates a tab-delimited list of all the features for the selected NMPDR organisms. The single positional parameter is the name of the output file. The currently-supported command-line options are as follows. =over 4 =item orgs Organisms whose features are desired. If C<all>, then all organisms will be listed. If C<nmpdr>, then all organisms in NMPDR groups will be listed. If C<core>, then only the organisms in the core NMPDR groups will be listed. The default is C<core>. =item user Name suffix to be used for log files. If omitted, the PID is used. =item trace Numeric trace level. A higher trace level causes more messages to appear. The default trace level is 2. Tracing will be directly to the standard output as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory, where I<User> is the value of the B<user> option above. =item sql If specified, turns on tracing of SQL activity. =item background Save the standard and error output to files. The files will be created in the FIG temporary directory and will be named C<err>I<User>C<.log> and C<out>I<User>C<.log>, respectively, where I<User> is the value of the B<user> option above. =item h Display this command's parameters and options. =item phone Phone number to message when the script is complete. =item filter Type of filtering to apply. If C<pegs>, only true PEGs will be included. If C<essential>, only essential genes will be included. Otherwise, all genes will be included. =item =back =cut use strict; use Tracer; use DocUtils; use TestUtils; use Sprout; use SFXlate; # Get the command-line options and parameters. my ($options, @parameters) = StandardSetup([qw(Sprout) ], { orgs => ["core", "type of organisms (core, nmpdr, all)"], filter => ["", "filtering type: pegs or essential"], phone => ["", "phone number (international format) to call when load finishes"], }, "<fileName>", @ARGV); # Set a variable to contain return type information. my $rtype; # Insure we catch errors. eval { # Get the Sprout object. my $sprout = SFXlate->new_sprout_only(); # Now we just output the list to the standard output. # Next, we need to determine the genomes of interest. This # is determined by the "orgs" option. my @genomes = $sprout->CoreGenomes($options->{orgs}); # If no genomes are found, it's an error. my $genomes = scalar @genomes; if (! $genomes) { Confess("No genomes found for orgs option \"$options->{org}\"."); } else { Trace("$genomes genomes will be processed.") if T(2); # Check for a file name. if (! $parameters[0]) { Confess("No output file specified."); } else { # A file was specified, so we open it. my $oh = Open(undef, ">$parameters[0]"); Trace("Output will be to $parameters[0].") if T(2); # We need to compute the filter clause, the parameters, and the # result columns. The base filter is by genome ID (which is the # first parameter). The base result column list is the # feature ID and assignment. Additional filtering and stuff could be # required by the filter option. my $filter = "HasFeature(from-link) = ?"; my @parms = ('genomeID'); my @cols = ('Feature(id)', 'Feature(assignment)'); if ($options->{filter} eq 'pegs') { # Here we filter by feature type to get PEGs only. $filter .= ' AND Feature(type) eq ?'; push @parms, 'peg'; Trace("Filtering for PEGs.") if T(2); } elsif ($options->{filter} eq 'essential') { # Here we filter by the essentiality column. $filter .= ' AND Feature(essential) IS NOT NULL'; push @cols, 'Feature(essential)'; Trace("Filtering for essential genes.") if T(2); } elsif ($options->{filter}) { # Here the filter type is invalid. Confess("Unknown filter type \"$options->{filter}\"."); } # Set up a counter. my $totalCount = 0; # Loop through the organisms. for my $genome (sort @genomes) { Trace("Processing $genome.") if T(3); # Store the genome ID in the parms. $parms[0] = $genome; # Get this organism's features according to the filter. my $query = $sprout->Get(['HasFeature', 'Feature'], $filter, \@parms); # Set up a counter. my $genomeCount = 0; # Write them to the output file. while (my $result = $query->Fetch()) { my @fields = $result->Values(\@cols); Tracer::PutLine($oh, \@fields); $genomeCount++; } # Update the counts. Trace("$genomeCount features found for $genome.") if T(3); $totalCount += $genomeCount; } Trace("$totalCount features output.") if T(2); # Close the output file. close $oh; } } }; if ($@) { Trace("Script failed with error: $@") if T(0); $rtype = "error"; } else { Trace("Script complete.") if T(2); $rtype = "no error"; } if ($options->{phone}) { my $msgID = Tracer::SendSMS($options->{phone}, "Core Peg List terminated with $rtype."); if ($msgID) { Trace("Phone message sent with ID $msgID.") if T(2); } else { Trace("Phone message not sent.") if T(2); } } 1;
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |