[Bio] / FigKernelScripts / svr_genome_functions.pl Repository:
ViewVC logotype

View of /FigKernelScripts/svr_genome_functions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Sun Aug 29 16:01:20 2010 UTC (9 years, 2 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +1 -1 lines
sort by feature id

#!/usr/bin/perl -w
use strict;

use Getopt::Long;
use SAPserver;
use ScriptThing;
use SeedEnv;

#
#	This is a SAS Component.
#

=head1 svr_genome_functions

    svr_genome_functions genome >genes.tbl

List the location and functional assignment for each gene in a specified genome.

This script takes as input a single genome ID as a positional parameter and
produces a three-column tab-delimited file containing each gene ID, its
L<SAP/Location String>, and its functional assignment. The output is to the
standard output.

=head2 Command-Line Options

=over 4

=item url

The URL for the Sapling server, if it is to be different from the default.

=back

=cut

# Parse the command-line options.
my $url;
my $opted =  GetOptions('url=s' => \$url);
# Get the genome ID.
my $genomeID = $ARGV[0];
# Check for errors.
if (! $opted || ! $genomeID) {
    print "usage: svr_genome_functions [--url=http://...] genomeID >output\n";
} else {
    # Get the server object.
    my $sapServer = SAPserver->new(url => $url);
    # Get the list of genes in this genome.
    my $genomeHash = $sapServer->all_features(-ids => $genomeID);
    my @geneList = sort { &SeedUtils::by_fig_id($a,$b) } @{$genomeHash->{$genomeID}};
    # The main loop processes chunks of input, 1000 lines at a time.
    while (my @tuples = ScriptThing::GetBatch(\@geneList, 1000)) {
        # Get the location and function for each ID found.
        my $fidHash = $sapServer->ids_to_data(-ids => [map { $_->[0] } @tuples],
                                              -data => ['location', 'function']);
        # Loop through the IDs, producing output.
        for my $tuple (sort { &SeedUtils::by_fig_id($a->[0],$b->[0]) } @tuples) {
            # Get the ID and the line.
            my ($id, $line) = @$tuple;
            # Get this feature's location and function. We spend a little effort to
            # insure we can recover if no result was found.
            my $locData = $fidHash->{$id};
            my ($loc, $function) = ('', 'unknown');
            if ($locData) {
                ($loc, $function) = @{$locData->[0]};
            }
            # Print the result.
            print join("\t", $line, $loc, $function) . "\n";
        }
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3