[Bio] / FigKernelScripts / svr_gene_data.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_gene_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     use Getopt::Long;
5 :     use SAPserver;
6 :     use ScriptThing;
7 :    
8 :     #
9 :     # This is a SAS Component.
10 :     #
11 :    
12 :     =head1 svr_gene_data
13 :    
14 : parrello 1.2 svr_gene_data fld1 fld2 ... fldN <gene_ids.tbl >gene_data.tbl
15 :    
16 : parrello 1.1 Get one or more pieces of data about each specified gene.
17 :    
18 :     This script takes as input a tab-delimited file with gene IDs at the end of each
19 :     line. For each gene ID, one or more selected data items are appended to each line.
20 :    
21 :     This is a pipe command: the input is taken from the standard input and the output
22 :     is to the standard output.
23 :    
24 :     The data items are specified as positional parameters on the command line, and
25 :     are appended in the order specified to the output lines. The permissible data items
26 :     are as follows.
27 :    
28 :     If a single identifier refers to multiple genes, there will be one output line for
29 :     each gene.
30 :    
31 :     =over 4
32 :    
33 :     =item evidence
34 :    
35 :     Comma-delimited list of evidence codes indicating the reason for the gene's
36 :     current assignment.
37 :    
38 :     =item fig-id
39 :    
40 :     The FIG ID of the gene.
41 :    
42 :     =item function
43 :    
44 :     Current functional assignment.
45 :    
46 :     =item genome-name
47 :    
48 :     Name of the genome containing the gene.
49 :    
50 :     =item length
51 :    
52 :     Number of base pairs in the gene.
53 :    
54 :     =item location
55 :    
56 :     Comma-delimited list of location strings indicated the location of the gene
57 :     in the genome. A location string consists of a contig ID, an underscore, the
58 :     starting offset, the strand (C<+> or C<->), and the number of base pairs.
59 :    
60 :     =item publications
61 :    
62 :     Comma-delimited list of PUBMED IDs for publications relating to the gene.
63 :    
64 :     =back
65 :    
66 :     =head2 Command-Line Options
67 :    
68 :     =over 4
69 :    
70 :     =item source
71 :    
72 :     Database source of the IDs specified-- C<SEED> for FIG IDs, C<GENE> for standard
73 :     gene identifiers, or C<LocusTag> for locus tags. In addition, you may specify
74 :     C<RefSeq>, C<CMR>, C<NCBI>, C<Trembl>, or C<UniProt> for IDs from those databases.
75 :     Use C<mixed> to allow mixed ID types (though this may cause problems when the same
76 :     ID has different meanings in different databases). Use C<prefixed> to allow IDs with
77 :     prefixing indicating the ID type (e.g. C<uni|P00934> for a UniProt ID, C<gi|135813> for
78 :     an NCBI identifier, and so forth). The default is C<SEED>.
79 :    
80 :     =item url
81 :    
82 :     The URL for the Sapling server, if it is to be different from the default.
83 :    
84 :     =back
85 :    
86 :     =cut
87 :    
88 :     # Parse the command-line options.
89 :     my $source = 'SEED';
90 :     my $url = '';
91 :     my $opted = GetOptions('source=s' => \$source, 'url=s' => \$url);
92 :     if (! $opted) {
93 :     print "usage: svr_gene_data [--source=SEED] [--url=http://...] [evidence | fig-id | function | genome-name | length | location | publications] ... <input >output\n";
94 :     } else {
95 :     # Get the list of output field names from the remaining positional parameters.
96 :     my @outputs = @ARGV;
97 :     # Get the server object.
98 :     my $sapServer = SAPserver->new(url => $url);
99 :     # The main loop processes chunks of input.
100 :     while (my @tuples = ScriptThing::GetBatch(\*STDIN)) {
101 :     # Ask the server for results.
102 :     my $document = $sapServer->ids_to_data(-ids => [map { $_->[0] } @tuples],
103 :     -source => $source,
104 :     -data => \@outputs);
105 :     # Loop through the IDs, producing output.
106 :     for my $tuple (@tuples) {
107 :     my ($id, $line) = @$tuple;
108 :     # Get this feature's data.
109 :     my $featureData = $document->{$id};
110 :     # Did we get something?
111 :     if (! $featureData) {
112 :     # No. Write an error notification.
113 :     print STDERR "Not found: $id\n";
114 :     } else {
115 :     # Yes. Loop through the tuples, printing output lines.
116 :     for my $tuple (@$featureData) {
117 :     print join("\t", $line, @$tuple) . "\n";
118 :     }
119 :     }
120 :     }
121 :     }
122 :     }
123 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3