[Bio] / FigKernelScripts / svr_ids_to_figfams.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_ids_to_figfams.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     use Getopt::Long;
5 :     use SAPserver;
6 : parrello 1.3 use ScriptThing;
7 : parrello 1.1
8 :     #
9 :     # This is a SAS Component.
10 :     #
11 :    
12 :     =head1 svr_ids_to_figfams
13 :    
14 : overbeek 1.8 svr_ids_to_figfams <gene_ids.tbl >figfam_data.tbl 2> lines.from.input.file.that.have.no.figfam
15 : parrello 1.5
16 : disz 1.7 List the FIGfams for each specified gene ID on STDOUT. List on STDERR those lines where the id does not have a FIGFam.
17 : parrello 1.1
18 :     This script takes as input a tab-delimited file with gene IDs at the end of each
19 : overbeek 1.8 line. For each gene ID, the FIGfam containing the gene and that FIGfam's associated function
20 :     are appended to the line. The functional is placed first and the FIGfam follows.
21 : parrello 1.1
22 : parrello 1.2 If the C<--idsOnly> option is specified, then the orginal input is discarded, and
23 :     only the FIGfam ID and the role will be output. Duplicate FIGfam IDs will be
24 :     conflated, so that you get a complete list of the FIGfams covered by the input
25 :     genes.
26 :    
27 : parrello 1.1 This is a pipe command: the input is taken from the standard input and the output
28 :     is to the standard output.
29 :    
30 :     Note that because some genes belong to multiple FIGfams, there may be more
31 :     output items than input lines.
32 :    
33 :     =head2 Command-Line Options
34 :    
35 :     =over 4
36 :    
37 : parrello 1.2 =item idsOnly
38 :    
39 :     If specified, only the IDs of the FIGfams found will be output, and duplicate IDs
40 :     will be conflated. Use this to get a list of all the FIGfams for a specified
41 :     list of genes.
42 :    
43 : parrello 1.1 =item source
44 :    
45 :     Database source of the IDs specified-- C<SEED> for FIG IDs, C<GENE> for standard
46 :     gene identifiers, or C<LocusTag> for locus tags. In addition, you may specify
47 :     C<RefSeq>, C<CMR>, C<NCBI>, C<Trembl>, or C<UniProt> for IDs from those databases.
48 :     Use C<mixed> to allow mixed ID types (though this may cause problems when the same
49 : parrello 1.3 ID has different meanings in different databases). Use C<prefixed> to allow IDs with
50 :     prefixing indicating the ID type (e.g. C<uni|P00934> for a UniProt ID, C<gi|135813> for
51 :     an NCBI identifier, and so forth). The default is C<SEED>.
52 : parrello 1.1
53 :     =item url
54 :    
55 :     The URL for the Sapling server, if it is to be different from the default.
56 :    
57 : parrello 1.6 =item c
58 :    
59 :     Column index. If specified, indicates that the input IDs should be taken from the
60 :     indicated column instead of the last column. The first column is column 1.
61 :    
62 : parrello 1.1 =back
63 :    
64 :     =cut
65 :    
66 :     # Parse the command-line options.
67 :     my $source = 'SEED';
68 :     my $url = '';
69 : parrello 1.2 my $idsOnly = '';
70 : parrello 1.6 my $column = 0;
71 : parrello 1.2 my $opted = GetOptions('idsOnly' => \$idsOnly, 'source=s' => \$source,
72 : parrello 1.6 'url=s' => \$url, 'c=i' => \$column);
73 : parrello 1.1 if (! $opted) {
74 : parrello 1.6 print "usage: svr_ids_to_figfams [--idsOnly] [--c=N] [--source=SEED] [--url=http://...] <input >output\n";
75 : parrello 1.1 } else {
76 : parrello 1.2 # Get the server object.
77 :     my $sapServer = SAPserver->new(url => $url);
78 :     # If we're in ids-only mode, this hash will track the FIGfam IDs found.
79 :     my %figFams;
80 : parrello 1.3 # The main loop processes chunks of input, 1000 lines at a time.
81 : parrello 1.6 while (my @tuples = ScriptThing::GetBatch(\*STDIN, undef, $column)) {
82 : parrello 1.2 # Ask the server for results.
83 : parrello 1.3 my $document = $sapServer->ids_to_figfams(-ids => [map { $_->[0] } @tuples],
84 : parrello 1.2 -source => $source,
85 :     -functions => 1);
86 :     # Loop through the IDs, producing output.
87 : parrello 1.3 for my $tuple (@tuples) {
88 : parrello 1.4 my ($id, $line) = @$tuple;
89 : parrello 1.2 # Get this feature's FIGfam data.
90 : parrello 1.3 my $results = $document->{$id};
91 : parrello 1.2 # Did we get something?
92 :     if (! $results) {
93 :     # No. Write an error notification.
94 : disz 1.7 print STDERR "$line\n";
95 : parrello 1.2 } else {
96 :     # Loop through the results for this ID.
97 :     for my $result (@$results) {
98 :     # Get the FIGfam role and ID.
99 :     my ($figfam, $role) = @$result;
100 :     # Is this ids-only mode?
101 :     if ($idsOnly) {
102 :     # Yes, remember the ID.
103 :     $figFams{$figfam} = $role;
104 :     } else {
105 :     # No, print the output line.
106 : parrello 1.3 print "$line\t$role\t$figfam\n";
107 : parrello 1.1 }
108 :     }
109 :     }
110 :     }
111 : parrello 1.2 }
112 :     # We're all done. In IDs-only mode, this is where we output the
113 :     # result.
114 :     if ($idsOnly) {
115 :     for my $figFam (sort keys %figFams) {
116 :     print "$figFams{$figFam}\t$figFam\n";
117 :     }
118 : parrello 1.1 }
119 :     }
120 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3