[Bio] / FigKernelScripts / svr_function_of.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/svr_function_of.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Oct 29 18:29:51 2009 UTC revision 1.3, Sat Nov 7 16:56:45 2009 UTC
# Line 1  Line 1 
 #!/usr/bin/perl -w  
1  use strict;  use strict;
2    use Data::Dumper;
3    use Carp;
4    
 use Getopt::Long;  
 use SAPserver;  
   
 #  
 #       This is a SAS Component.  
 #  
5    
6  =head1 svr_function_of  =head1 svr_function_of
7    
8  Get the functional assignment for each specified gene.  Get functions of protein-encoding genes
9    
10  This script takes as input a tab-delimited file with gene IDs at the end of each  ------
11  line. For each gene ID, the functional assignment is appended to the line.  Example: svr_all_features 3702.1 peg | svr_function_of
12    
13  This is a pipe command: the input is taken from the standard input and the output  would produce a 2-column table.  The first column would contain
14  is to the standard output.  PEG IDs for genes occurring in genome 3702.1, and the second
15    would contain the functions of those genes.
16    ------
17    
18  =head2 Command-Line Options  The standard input should be a tab-separated table (i.e., each line
19    is a tab-separated set of fields).  Normally, the last field in each
20    line would contain the PEG for which functions are being requested.
21    If some other column contains the PEGs, use
22    
23  =over 4      -c N
24    
25  =item source  where N is the column (from 1) that contains the PEG in each case.
26    
27  Database source of the IDs specified-- C<SEED> for FIG IDs, C<GENE> for standard  This is a pipe command. The input is taken from the standard input, and the
28  gene identifiers, or C<LocusTag> for locus tags. In addition, you may specify  output is to the standard output.
 C<RefSeq>, C<CMR>, C<NCBI>, C<Trembl>, or C<UniProt> for IDs from those databases.  
 Use C<mixed> to allow mixed ID types (though this may cause problems when the same  
 ID has different meanings in different databases). The default is C<SEED>.  
29    
30  =item url  =head2 Command-Line Options
31    
32  The URL for the Sapling server, if it is to be different from the default.  =item -c Column
33    
34  =back  This is used only if the column containing PEGs is not the last.
35    
36    =head2 Output Format
37    
38    The standard output is a tab-delimited file. It consists of the input
39    file with an extra column added (the function associated with the PEG).
40    
41  =cut  =cut
42    
43  # Parse the command-line options.  use SeedUtils;
44  my $source = 'SEED';  use SAPserver;
45  my $url = '';  my $sapObject = SAPserver->new();
46  my $opted =  GetOptions('source=s' => \$source, 'url=s' => \$url);  
47  if (! $opted) {  my $usage = "usage: svr_function_of [-c column]";
     print "usage: svr_function_of [--source=SEED] [--url=http://...] <input >output\n";  
 } else {  
     # Get the server object.  
     my $sapServer = SAPserver->new(url => $url);  
     # The main loop processes chunks of input, 100 lines at a time.  
     while (! eof STDIN) {  
         # We will build our list of IDs in here.  
         my @ids;  
         # This hash will map each ID to its input line.  
         my %lines;  
         # This will count the lines read in this batch.  
         my $reads = 0;  
         # Loop through the input. We stop at 1000 lines.  
         while ($reads <= 1000 && ! eof STDIN) {  
             # Read the line and trim it.  
             my $line = <STDIN>;  
             chomp $line;  
             # Count the read.  
             $reads++;  
             # Get the feature ID and save it.  
             my @fields = split /\t/, $line;  
             if (scalar @fields) {  
                 my $fid = pop @fields;  
                 push @ids, $fid;  
                 # Save the input line for this ID.  
                 $lines{$fid} = join("\t", @fields, $fid);  
48    
49    my $column;
50    while ($ARGV[0] && ($ARGV[0] =~ /^-/))
51    {
52        $_ = shift @ARGV;
53        if    ($_ =~ s/^-c//) { $column       = ($_ || shift @ARGV) }
54        else                  { die "Bad Flag: $_" }
55    }
56    
57    my @lines = map { chomp; [split(/\t/,$_)] } <STDIN>;
58    if (! $column)  { $column = @{$lines[0]} }
59    my @fids = map { $_->[$column-1] } @lines;
60    
61    my $functions = $sapObject->ids_to_functions(-ids => \@fids);
62    foreach $_ (@lines)
63    {
64        print join("\t",@$_,$functions->{$_->[$column-1]}),"\n";
65              }              }
         }  
         # Ask the server for results.  
         my $document = $sapServer->ids_to_functions(-ids => \@ids,  
                                                     -source => $source,  
                                                     -functions => 1);  
         # Loop through the IDs, producing output.  
         for my $fid (@ids) {  
             # Get this feature's assignment.  
             my $function = $document->{$fid};  
             # Did we get something?  
             if (! $function) {  
                 # No. Write an error notification.  
                 print STDERR "Not found: $fid\n";  
             } else {  
                 # Yes, print the output line.  
                 print "$lines{$fid}\t$function\n";  
             }  
         }  
     }  
 }  
   

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3