[Bio] / FigKernelScripts / svr_upstream.pl Repository:
ViewVC logotype

View of /FigKernelScripts/svr_upstream.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jul 15 23:36:53 2009 UTC (10 years, 8 months ago) by parrello
Branch: MAIN
New scripts for operon pipeline.

#!/usr/bin/perl -w
use strict;

use Getopt::Long;
use Pod::Usage;
use SAPserver;

#
#	This is a SAS Component.
#

=head1 svr_upstream

Retrieve upstream regions from the Sapling Server.

This script takes as input a tab-delimited file with feature IDs at the end. For
each feature ID, the upstream DNA is computed and written to the output file in
FASTA format. Sections of DNA that occur inside a feature are shown in upper
case. DNA between known features is shown in lower case.

=head2 Command-Line Options

=over 4

=item skipGene

If specified, then only the upstream region is output. Otherwise, the upstream
region and the feature interior are output together.

=item size

Number of base pairs to show in the upstream region. The default is C<200>.

=item url

The URL for the sapling server, if it is to be different from the default.

=back

=cut

# Parse the command-line options.
my $skipGene = '';
my $size = 200;
my $url = 'http://bio-macpro-1.mcs.anl.gov/~parrello/FIG/sap_server.cgi';
my $opted =  GetOptions('skipGene' => \$skipGene, 'size=i' => \$size, 'url' => \$url);
if (! $opted) {
    print "usage: sap_upstream [--skipGene] [--size=200] [-url=http://...] <input >output\n";
} else {
    # Protect from errors.
    eval {
        # Get the server object.
        my $sapServer = SAPserver->new(url => $url);
        # The main loop processes chunks of input, 100 lines at a time.
        while (! eof STDIN) {
            # We will build our list of IDs in here.
            my @ids;
            # This hash will map each ID to its extra fields.
            my %comments;
            # This will count the lines read in this batch.
            my $reads = 0;
            # Loop through the input. We stop at 100 lines.
            while ($reads <= 100 && ! eof STDIN) {
                # Read the line and trim it.
                my $line = <STDIN>;
                chomp $line;
                # Count the read.
                $reads++;
                # Get the feature ID and save it.
                my @fields = split /\t/, $line;
                if (scalar @fields) {
                    my $fid = pop @fields;
                    push @ids, $fid;
                    $comments{$fid} = join(" ", @fields);
                }
            }
            # Ask the server for results.
            my $document = $sapServer->upstream(ids => \@ids, size => $size,
                                                comments => \%comments,
                                                skipGene => $skipGene);
            # Loop through the IDs, producing output.
            for my $fid (@ids) {
                # Get this feature's FASTA.
                my $fasta = $document->{$fid};
                # Did we get something?
                if (! $fasta) {
                    # No. Write an error notification.
                    print STDERR "Not found: $fid\n";
                } else {
                    # Yes. output the FASTA.
                    print "$fasta";
                }
            }
        }
    };
    if ($@) {
        print STDERR "FATAL ERROR: $@\n";
    }
}


1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3