[Bio] / FigKernelScripts / svr_dna_seq.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_dna_seq.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     use Getopt::Long;
5 :     use SAPserver;
6 :     use ScriptThing;
7 :     use SeedUtils;
8 :    
9 :     #
10 :     # This is a SAS Component.
11 :     #
12 :    
13 :     =head1 svr_dna_seq
14 :    
15 :     svr_dna_seq <ids.tbl >sequences.tbl
16 :    
17 : parrello 1.2 Produce DNA strings for contigs, FIG feature IDs, and/or locations.
18 : parrello 1.1
19 :     This script takes as input a tab-delimited file with contig IDs and locations
20 :     at the end of each line. For each one, the appropriate DNA or protein sequence
21 :     is written to the output file. If the C<--fasta> option is specified, the
22 :     sequence is written in FASTA format.
23 :    
24 :     This is a pipe command: the input is taken from the standard input and the
25 :     output to the standard output. The columns of data preceding the first will be
26 :     supplied as comments to each FASTA string.
27 :    
28 :     =head2 Command-Line Options
29 :    
30 :     =over 4
31 :    
32 :     =item fasta
33 :    
34 :     If specified, the output sequences will be FASTA format, otherwise just simple character strings.
35 :     The default is FALSE. In this case the output file will look the same as the
36 :     input file but with DNA sequences tacked onto the end of each line.
37 :    
38 :     =item url
39 :    
40 :     The URL for the Sapling server, if it is to be different from the default.
41 :    
42 :     =item c
43 :    
44 :     Column index. If specified, indicates that the input IDs should be taken from the
45 :     indicated column instead of the last column. The first column is column 1.
46 :    
47 :     =back
48 :    
49 :     =cut
50 :    
51 :     # Parse the command-line options.
52 :     my $column;
53 :     my $url;
54 :     my $fasta = 0;
55 :     my $opted = GetOptions('fasta' => \$fasta, 'c=i', \$column, 'url=s' => \$url);
56 :     if (! $opted) {
57 :     print "usage: svr_dna_seq [--fasta] [--url=http://...] [--c=N] <input >output\n";
58 :     } else {
59 :     # Get the server object.
60 :     my $sapServer = SAPserver->new(url => $url);
61 :     # The main loop processes chunks of input 10 at a time for DNA. (This is to prevent
62 :     # timeouts, because DNA requires serious work.)
63 :     while (my @tuples = ScriptThing::GetBatch(\*STDIN, 10, $column)) {
64 :     # If we're in FASTA mode, we need to create a comment hash.
65 :     my %comments;
66 :     if ($fasta) {
67 :     %comments = ScriptThing::CommentHash(\@tuples);
68 :     }
69 :     # The Sapling Server method we're using expects a hash of labels to locations,
70 :     # so we create one using the IDs in the input stream.
71 :     my %idHash = map { $_->[0] => $_->[0] } @tuples;
72 :     # Ask the server for results.
73 :     my $document = $sapServer->locs_to_dna(-locations => \%idHash);
74 :     # Loop through the IDs, producing output.
75 :     for my $tuple (@tuples) {
76 :     # Get the ID and the line.
77 :     my ($id, $line) = @$tuple;
78 :     # Get this ID's sequence.
79 :     my $seq = $document->{$id};
80 :     # Did we get something?
81 :     if (! $seq) {
82 :     # No. Write an error notification.
83 :     print STDERR "Not found: $id\n";
84 :     } elsif (! $fasta) {
85 :     # Yes, and it's to be output as a normal sequence.
86 :     print "$line\t$document->{$id}\n";
87 :     } else {
88 :     # Yes, and it's to be output in FASTA format.
89 :     print create_fasta_record($id, $comments{$id}, $document->{$id});
90 :     }
91 :     }
92 :     }
93 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3