[Bio] / FigKernelPackages / FFserver.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FFserver.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.19, Fri Aug 21 21:58:08 2009 UTC revision 1.20, Thu Oct 29 18:26:59 2009 UTC
# Line 1  Line 1 
   
1  package FFserver;  package FFserver;
2    
3  #  #
4  # This is a SAS Component  # This is a SAS Component
5  #  #
6    
7    =head1 FIGfam Server Helper Object
8    
9    This module is used to call the FIGfam server, which is a general-purpose
10    server for extracting data from the FIGfams database. Each FIGfam server
11    function correspond to a method of this object.
12    
13    This package deliberately uses no internal SEED packages or scripts, only common
14    PERL modules.
15    
16    =cut
17    
18  use LWP::UserAgent;  use LWP::UserAgent;
19  use Data::Dumper;  use Data::Dumper;
20  use YAML;  use YAML;
# Line 27  Line 37 
37      return bless $self, $class;      return bless $self, $class;
38  }  }
39    
40    =head2 Functions
41    
42    =head3 members_of_families
43    
44        my $document = $ffObject->members_of_families(@ids);
45    
46    Return the function and a list of the members for each specified family.
47    
48    =over 4
49    
50    =item ids
51    
52    A list of FIGfam IDs.
53    
54    =item RETURN
55    
56    Returns a reference to a list of 3-tuples. Each 3-tuple will consist of a FIGfam
57    family ID followed by the family's function and a sub-list of all the FIG feature
58    IDs for the features in the family.
59    
60    =back
61    
62    =cut
63    
64  sub members_of_families  sub members_of_families
65  {  {
66      my($self, @ids) = @_;      my($self, @ids) = @_;
67      return $self->run_query('members_of_families', @ids);      return $self->run_query('members_of_families', @ids);
68  }  }
69    
70    =head3 families_containing_peg
71    
72        my $document = $ffObject->families_containing_peg(@ids);
73    
74    Return the FIGfams containing the specified features.
75    
76    =over 4
77    
78    =item ids
79    
80    A list of FIG feature IDs.
81    
82    =item RETURN
83    
84    Returns a list of 2-tuples, each consisting of an incoming feature ID
85    followed by a list of FIGfam IDs for the families containing the incoming
86    feature.
87    
88    =back
89    
90    =cut
91    
92  sub families_containing_peg  sub families_containing_peg
93  {  {
94      my($self, @ids) = @_;      my($self, @ids) = @_;
95      return $self->run_query('families_containing_peg', @ids);      return $self->run_query('families_containing_peg', @ids);
96  }  }
97    
98    =head3 function_of
99    
100        my $document = $ffObject->function_of(@ids);
101    
102    Return the current functional assignment for each specified feature.
103    
104    =over 4
105    
106    =item ids
107    
108    A list of FIG feature IDs.
109    
110    =item RETURN
111    
112    Returns a list of 2-tuples, each consisting of an incoming feature ID
113    followed by the feature's functional assignment, or an undefined value
114    if the feature is not found in the FIGfam database.
115    
116    =back
117    
118    =cut
119    
120  sub function_of  sub function_of
121  {  {
122      my($self, @ids) = @_;      my($self, @ids) = @_;
123      return $self->run_query('function_of', @ids);      return $self->run_query('function_of', @ids);
124  }  }
125    
126    =head3 org_of
127    
128        my $document = $ffObject->org_of(@ids);
129    
130    Return the name of the genome containing each specified feature.
131    
132    =over 4
133    
134    =item ids
135    
136    A list of FIG feature IDs.
137    
138    =item RETURN
139    
140    Returns a list of 2-tuples, each consisting of an incoming feature ID
141    followed by the name of the genome containing the feature, or an undefined value
142    if the feature is not found in the FIGfam database.
143    
144    =back
145    
146    =cut
147    
148  sub org_of  sub org_of
149  {  {
150      my($self, @ids) = @_;      my($self, @ids) = @_;
151      return $self->run_query('org_of', @ids);      return $self->run_query('org_of', @ids);
152  }  }
153    
154    =head3 seq_of
155    
156        my $document = $ffObject->seq_of(@ids);
157    
158    Return the protein sequence for each specified feature.
159    
160    =over 4
161    
162    =item ids
163    
164    A list of FIG feature IDs.
165    
166    =item RETURN
167    
168    Returns a list of 2-tuples, each consisting of an incoming feature ID
169    followed by the protein sequence for the feature, or an undefined value
170    if the feature is not found in the FIGfam database.
171    
172    =back
173    
174    =cut
175    
176  sub seq_of  sub seq_of
177  {  {
178      my($self, @ids) = @_;      my($self, @ids) = @_;
179      return $self->run_query('seq_of', @ids);      return $self->run_query('seq_of', @ids);
180  }  }
181    
182    =head3 aliases_of
183    
184        my $document = $ffObject->aliases_of(@ids);
185    
186    Return the aliases for each specified feature.
187    
188    =over 4
189    
190    =item ids
191    
192    A list of FIG feature IDs.
193    
194    =item RETURN
195    
196    Returns a list of 2-tuples, each consisting of an incoming feature ID
197    followed by a comma-delimited list of the aliases for the feature, or an
198    undefined value if the feature is not found in the FIGfam database.
199    
200    =back
201    
202    =cut
203    
204  sub aliases_of  sub aliases_of
205  {  {
206      my($self, @ids) = @_;      my($self, @ids) = @_;
207      return $self->run_query('aliases_of', @ids);      return $self->run_query('aliases_of', @ids);
208  }  }
209    
210    =head3 families_implementing_role
211    
212        my $document = $ffObject->families_implementing_role(@roles);
213    
214    Return the FIGfams that implement the specified roles. Each FIGfam has
215    a single function associated with it, but the function may involve
216    multiple roles, or may include comments. The role is therefore a more
217    compact string than the function.
218    
219    =over 4
220    
221    =item roles
222    
223    A list of role names.
224    
225    =item RETURN
226    
227    Returns a list of 2-tuples, each consisting of an incoming role name
228    followed by a list of FIGfam IDs for the families that implement the
229    incoming role.
230    
231    =back
232    
233    =cut
234    
235  sub families_implementing_role  sub families_implementing_role
236  {  {
237      my($self,@roles) = @_;      my($self,@roles) = @_;
238      return $self->run_query('families_implementing_role', @roles);      return $self->run_query('families_implementing_role', @roles);
239  }  }
240    
241    =head3 families_with_function
242    
243        my $document = $ffObject->families_with_function(@functions);
244    
245    Return the FIGfams that belong to the specified functions. Each FIGfam has
246    a single function associated with it, but the function may involve
247    multiple roles, or may include comments. The function is therefore a
248    more specific string than the role.
249    
250    =over 4
251    
252    =item functions
253    
254    A list of functional roles.
255    
256    =item RETURN
257    
258    Returns a list of 2-tuples, each consisting of an incoming role name
259    followed by a list of FIGfam IDs for the families associated with the
260    incoming function.
261    
262    =back
263    
264    =cut
265    
266  sub families_with_function  sub families_with_function
267  {  {
268      my($self,@functions) = @_;      my($self,@functions) = @_;
269      return $self->run_query('families_with_function', @functions);      return $self->run_query('families_with_function', @functions);
270  }  }
271    
272    =head3 families_in_genome
273    
274        my $document = $ffObject->families_in_genome(@genomes);
275    
276    Return the FIGfams that have members in the specified genomes.
277    
278    =over 4
279    
280    =item genomes
281    
282    A list of genome IDs.
283    
284    =item RETURN
285    
286    Returns a list of 2-tuples, each consisting of an incoming genome ID
287    followed by a list of FIGfam IDs for the families that have members in
288    that genome.
289    
290    =back
291    
292    =cut
293    
294  sub families_in_genome  sub families_in_genome
295  {  {
296      my($self,@genomes) = @_;      my($self,@genomes) = @_;
297      return $self->run_query('families_in_genome', @genomes);      return $self->run_query('families_in_genome', @genomes);
298  }  }
299    
300    =head3 get_subsystem_based_figfams
301    
302        my $document = $ffObject->get_subsystem_based_figfams();
303    
304    Return a list of the FIGfams derived from subsystems.
305    
306    =over 4
307    
308    =item RETURN
309    
310    Returns a reference to a list of the IDs for the FIGfams derived from subsystems.
311    
312    =back
313    
314    =cut
315    
316  sub get_subsystem_based_figfams  sub get_subsystem_based_figfams
317  {  {
318      my ($self) = @_;      my ($self) = @_;
319      return $self->run_query('get_subsystem_based_figfams');      return $self->run_query('get_subsystem_based_figfams');
320  }  }
321    
322    =head3 should_be_member
323    
324        my $document = $ffObject->should_be_member(@id_seq_pairs);
325    
326    Determine whether a particular protein sequence belongs in a particular
327    FIGfam. This method takes as input multiple FIGfam/sequence pairs and
328    performs a determination for each.
329    
330    =over 4
331    
332    =item id_seq_pairs
333    
334    A list of 2-tuples, each consisting of a FIGfam ID followed
335    by a protein sequence string.
336    
337    =item RETURN
338    
339    Returns a reference to a list of boolean flags, one per input pair. For each
340    input pair, the flag will be C<1> if the sequence should be in the FIGfam and
341    C<0> otherwise.
342    
343    =back
344    
345    =cut
346    
347  sub should_be_member  sub should_be_member
348  {  {
349      my($self, @id_seq_pairs) = @_;      my($self, @id_seq_pairs) = @_;
350      return $self->run_query('should_be_member', @id_seq_pairs);      return $self->run_query('should_be_member', @id_seq_pairs);
351  }  }
352    
353    =head3 all_families
354    
355        my $document = $ffObject->all_families();
356    
357    Return a list of the IDs for all the FIGfams in the system.
358    
359    =over 4
360    
361    =item RETURN
362    
363    Returns a reference to a list of the IDs for all the FIGfams in the system.
364    
365    =back
366    
367    =cut
368    
369  sub all_families  sub all_families
370  {  {
371      my($self) = @_;      my($self) = @_;
372      return $self->run_query('all_families');      return $self->run_query('all_families');
373  }  }
374    
375  sub run_query  =head3 assign_function_to_prot
 {  
     my($self, $function, @args ) = @_;  
     my $form = [function  => $function,  
                 args => YAML::Dump(@args),  
                 ];  
     return $self->run_query_form($form);  
 }  
376    
377  sub run_query_form      my $document = $ffObject->assign_function_to_prot($input, $blast, $min_hits, $assignToAll);
 {  
     my($self, $form, $raw) = @_;  
378    
379      my $res = $self->{ua}->post($self->{server_url}, $form);  For each incoming protein sequence, attempt to place it in a FIGfam. If a
380    suitable FIGfam can be found for a particular sequence, the FIGfam ID and
381    its functional assignment will be returned.
382    
383      if ($res->is_success)  =over 4
     {  
         my $content = $res->content;  
         if ($raw)  
         {  
             return $content;  
         }  
384    
385  #       print "Got $content\n";  =item input
386          my $ret;  
387          eval {  Either (1) an open input handle to a file containing the proteins in FASTA format,
388              $ret = Load($content);  or (2) a reference to a list of FASTA strings for the proteins.
389          };  
390          if ($@)  =item blast
391          {  
392              die "Query returned unparsable content ($@): " . $content;  If nonzero, then when a protein is placed into a FIGfam, a BLAST will be performed
393          }  afterward, and the top I<N> hits (where I<N> is the value of this parameter)
394          return $ret;  will be returned as part of the protein's output tuple.
395      }  
396      else  =item min_hits
397      {  
398          die "error on post " . $res->status_line . " " . $res->content;  A number from 1 to 10, indicating the minimum number of matches required to
399      }  consider a protein as a candidate for assignment to a FIGfam. A higher value
400  }  indicates a more reliable matching algorithm; the default is C<3>.
401    
402    =item assign_to_all
403    
404    If TRUE, then if the standard matching algorithm fails to assign a protein,
405    a BLAST will be used. The BLAST is slower, but is capable of placing more
406    proteins than the normal algorithm.
407    
408    =item RETURN
409    
410    Returns a Result Handler. Call C<get_next> on the result handler to get back a data
411    item. Each item sent back by the result handler is a 2-tuple containing the
412    incoming protein sequence and a reference to a list consisting of the proposed
413    functional assignment for the protein, the name of the Genome Set from which the
414    protein is likely to have originated (if known), a list of BLAST hits (if
415    requested), and the number of matches for the protein found in the FIGfam. If no
416    assignment could be made for a particular protein, it will not appear in the
417    output stream.
418    
419    =back
420    
421    =cut
422    
423  sub assign_function_to_prot  sub assign_function_to_prot
424  {  {
# Line 164  Line 445 
445                                $params, $req_bytes);                                $params, $req_bytes);
446  }  }
447    
448    =head3 call_genes
449    
450        my $document = $ffObject->call_genes($input, $genetic_code);
451    
452    Call the protein-encoding genes for the specified DNA sequences. The result will
453    be a multi-sequence FASTA string listing all the proteins found and a hash mapping
454    each gene found to its location string.
455    
456    =over 4
457    
458    =item input
459    
460    Open input handle to a file containing the DNA sequences in FASTA format.
461    
462    =item genetic_code
463    
464    The numeric code for the mapping from DNA to amino acids. The default is C<11>,
465    which is the standard mapping and should be used in almost all cases. A complete
466    list of mapping codes can be found at
467    L<http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi>.
468    
469    =item RETURN
470    
471    Returns a 2-tuple consisting of a FASTA string for all the proteins found
472    followed by a reference to a list of genes found. Each gene found will be
473    represented by a 4-tuple containing an ID for the gene, the ID of the contig
474    containing it, the starting offset, and the ending offset.
475    
476    =back
477    
478    =cut
479    
480  sub call_genes  sub call_genes
481  {  {
482      my($self, $input, $genetic_code) = @_;      my($self, $input, $genetic_code) = @_;
# Line 193  Line 506 
506                                    id_seq => $input]);                                    id_seq => $input]);
507  }  }
508    
509    =head3 find_rnas
510    
511        my $document = $ffObject->find_rnas($input, $genus, $species, $domain);
512    
513    Call the RNAs for the specified DNA sequences. The result will be a
514    multi-sequence FASTA string listing all the RNAs found and a hash mapping
515    each RNA to its location string.
516    
517    =over 4
518    
519    =item input
520    
521    Open input handle to a file containing the DNA sequences in FASTA format.
522    
523    =item genus
524    
525    Common name of the genus for this DNA.
526    
527    =item species
528    
529    Common name of the species for this DNA.
530    
531    =item domain
532    
533    Domain of this DNA. The default is C<Bacteria>.
534    
535    =item RETURN
536    
537    Returns a 2-tuple consisting of a FASTA string for all the RNAs found
538    followed by reference to a list of RNAs found. Each RNA will be represented by
539    a 4-tuple consisting of an ID for the RNA, the ID of the contig containing it, its
540    starting offset, and its ending offset.
541    
542    =back
543    
544    =cut
545    
546  sub find_rnas  sub find_rnas
547  {  {
548      my($self, $input, $genus, $species, $domain) = @_;      my($self, $input, $genus, $species, $domain) = @_;
# Line 224  Line 574 
574                                    id_seq => $input]);                                    id_seq => $input]);
575  }  }
576    
577    =head3 assign_functions_to_DNA
578    
579        my $document = $ffObject->assign_functions_to_DNA($input, $blast, $min_hits, $max_gap);
580    
581    Analyze DNA sequences and output regions that probably belong to FIGfams.
582    The selected regions will be high-probability candidates for protein
583    production.
584    
585    =over 4
586    
587    =item input
588    
589    Either (1) an open input handle to a file containing the DNA sequences in FASTA format,
590    or (2) a reference to a list of FASTA strings for the DNA sequences.
591    
592    =item blast
593    
594    If nonzero, then when a protein is placed into a FIGfam, a BLAST will be performed
595    afterward, and the top I<N> hits (where I<N> is the value of this parameter)
596    will be returned as part of each protein's output tuple.
597    
598    =item min_hits
599    
600    A number from 1 to 10, indicating the minimum number of matches required to
601    consider a protein as a candidate for assignment to a FIGfam. A higher value
602    indicates a more reliable matching algorithm; the default is C<3>.
603    
604    =item max_gap
605    
606    When looking for a match, if two sequence elements match and are closer than
607    this distance, then they will be considered part of a single match. Otherwise,
608    the match will be split. The default is C<600>.
609    
610    =item RETURN
611    
612    Returns a Result Handler. Call C<get_next> on the result handler to get back a data
613    item. Each item sent back by the result handler is a 2-tuple containing the
614    incoming protein sequence and a reference to a list of hit regions. Each hit
615    region is a 6-tuple consisting of the number of matches to the FIGfam, the start
616    location, the stop location, the proposed functional assignment, the name of the
617    Genome Set from which the gene is likely to have originated, and a list of BLAST
618    hits. If the I<blast> option is not specified, the list of BLAST hits will be
619    empty.
620    
621    =back
622    
623    =cut
624    
625  sub assign_functions_to_dna  sub assign_functions_to_dna
626  {  {
627      my($self, $input, $min_hits, $max_gap, $blast) = @_;      my($self, $input, $min_hits, $max_gap, $blast) = @_;
# Line 250  Line 648 
648                                \&tab_delimited_output_parser, $params, $req_bytes);                                \&tab_delimited_output_parser, $params, $req_bytes);
649  }  }
650    
651    ###### Utility Methods ######
652    
653    sub run_query
654    {
655        my($self, $function, @args ) = @_;
656        my $form = [function  => $function,
657                    args => YAML::Dump(\@args),
658                    ];
659        return $self->run_query_form($form);
660    }
661    
662    sub run_query_form
663    {
664        my($self, $form, $raw) = @_;
665    
666        my $res = $self->{ua}->post($self->{server_url}, $form);
667    
668        if ($res->is_success)
669        {
670            my $content = $res->content;
671            if ($raw)
672            {
673                return $content;
674            }
675    
676    #       print "Got $content\n";
677            my $ret;
678            eval {
679                $ret = Load($content);
680            };
681            if ($@)
682            {
683                die "Query returned unparsable content ($@): " . $content;
684            }
685            return $ret;
686        }
687        else
688        {
689            die "error on post " . $res->status_line . " " . $res->content;
690        }
691    }
692    
693  sub id_seq_pair_bundler  sub id_seq_pair_bundler
694  {  {
695      my($item) = @_;      my($item) = @_;

Legend:
Removed from v.1.19  
changed lines
  Added in v.1.20

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3