[Bio] / FigWebServices / rest_seed.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/rest_seed.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download)

1 : redwards 1.1 #__perl__
2 :    
3 :     # NOTE THAT use strict will break this!!
4 :     use lib '/home/redwards/perl/lib/perl5/site_perl/5.8.7/i686-linux/';
5 :     use CGI;
6 :     use CGI::Carp qw/fatalsToBrowser/;
7 :     use JSON::XS;
8 :    
9 :     use FIG;
10 :     use FIG_Config;
11 :     use Data::Dumper;
12 :     use FigFams;
13 :     use FigFam;
14 :     use IPC::Open3;
15 :     use PinnedRegions;
16 :     use URI::Escape;
17 :    
18 :    
19 :     =pod
20 :    
21 :     =head1 rest_seed.cgi
22 :    
23 :     YAWS - Yet another web service!
24 :    
25 :     Why: We're using rpc encoding which is basically URL encoding. In this, I call something like
26 :    
27 :     http://bioseed.mcs.anl.gov/~redwards/FIG/rest_seed.cgi/multiply/2/3/4/5
28 :    
29 :     and get a response. Why do we need another web service? Mainly because of the Google work. Google pretty much exclusively deals with http requests, and eschews SOAP and other encodings as being too complex.
30 :    
31 :     The data returned is all in JSON format (http://www.json.org/) which is the Javascript object notation format. JSON is a really light weight markup language that cna handle complex objects quite easily.
32 :    
33 :     I am also aiming for lightweight code. In this case, we're not going to instantiate anything until we need it. Hopefully.
34 :    
35 :     NOTE: I can't figure out how to encode forward slashes in URLs. We cant escape them with two slashes (\\) since the browser concatenates them, and we cant encode them with url_encode cos the server craps out. For now I am using the regexp forwardslash (case insensitive)
36 :    
37 :     =cut
38 :    
39 :     # a really simple RESTful web service that returns seed data
40 :    
41 :     my $cgi=new CGI qw/:standard/;
42 :     my $json= new JSON::XS;
43 :    
44 :     print $cgi->header('text/plain');
45 :    
46 :     # get the query with path so we get the RESTful information
47 :     my $abs = $cgi->url(-absolute=>1); # this will be the cgi-bin/rest.cgi stuff
48 :     my $rest = $cgi->url(-path_info=>1);
49 :     $rest =~ s/^.*$abs\///;
50 :    
51 :     # we need to escape double slashes so that we don't split on them
52 :    
53 :     my @rest=split m#/#, $rest;
54 :    
55 :     #my @rest=split m#(?<!/)/(?!/)#, $rest; # this matches a '/' that is neither preceeded nor followed by a '/', so '//' does not match
56 :    
57 :     map {$rest[$_] =~ s#forwardslash#/#gi} (0 .. $#rest);
58 :    
59 :     my $method = shift @rest;
60 :    
61 :     # there is no good way of passing a null value!!
62 :     map {undef $rest[$_] if ($rest[$_] eq "undef" || $rest[$_] eq "null")} (0..$#rest);
63 :    
64 :     #my $result = $json->encode({url=>$rest, query=>\@rest, result => &{$method}(@rest) });
65 :     my $result = $json->encode({result => &{$method}(@rest) });
66 :    
67 :     print $result, "\n";
68 :    
69 :    
70 :    
71 :     =pod
72 :    
73 :     =head1 multiply.
74 :    
75 :     This is a really simple method that just multiplies two numbers! It's great for testing things out
76 :    
77 :     =cut
78 :    
79 :     sub multiply {
80 :    
81 :     my $x = 1;
82 :     map {$x = $x * $_} @_;
83 :     return $x;
84 :     }
85 :    
86 :     =head1 returnNumbers
87 :    
88 :     And this method returns a reference to an array, so you can test JSON parsing
89 :    
90 :     =cut
91 :    
92 :     sub returnNumbers {
93 :     return [21, 23];
94 :     }
95 :    
96 :    
97 :    
98 :    
99 :     =begin WSDL
100 :     _IN alias $string
101 :     _RETURN $string
102 :     _DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence
103 :     =cut
104 :     sub ali_to_seq {
105 :     my ($arg) = @_;
106 :     return stdin_caller("ali_to_seq", $arg);
107 :     }
108 :    
109 :     =begin WSDL
110 :     _IN pegs $string
111 :     _RETURN $string
112 :     _DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome.
113 :     =cut
114 :     sub adjacent {
115 :     my ($arg) = @_;
116 :     $arg =~ s/\,\s*/\n/g;
117 :     return stdin_caller("adjacent", $arg);
118 :     }
119 :    
120 :     =begin WSDL
121 :     _IN peg $string
122 :     _RETURN $string
123 :     _DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster]
124 :     =cut
125 :     sub cluster_by_bbhs {
126 :     my ($arg) = @_;
127 :     return stdin_caller("cluster_by_bbhs", $arg);
128 :     }
129 :    
130 :     =begin WSDL
131 :     _IN peg $string
132 :     _RETURN $string
133 :     _DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster]
134 :     =cut
135 :     sub cluster_by_sim {
136 :     my ($arg) = @_;
137 :     return stdin_caller("cluster_by_sim", $arg);
138 :     }
139 :    
140 :     =begin WSDL
141 :     _IN peg $string
142 :     _RETURN $string
143 :     _DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function]
144 :     =cut
145 :     sub external_calls {
146 :     my ($arg) = @_;
147 :     return stdin_caller("external_calls", $arg);
148 :     }
149 :    
150 :     =begin WSDL
151 :     _IN peg $string
152 :     _RETURN $string
153 :     _DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function
154 :     =cut
155 :     sub function_of {
156 :     my ($arg) = @_;
157 :     my $fig=new FIG;
158 :     return scalar($fig->function_of($arg));
159 :     }
160 :    
161 :     =begin WSDL
162 :     _IN peg $string
163 :     _RETURN $string
164 :     _DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes
165 :     =cut
166 :     sub genomes_of {
167 :     my ($arg) = @_;
168 :     return stdin_caller("genomes_of", $arg);
169 :     }
170 :    
171 :    
172 :     =begin WSDL
173 :     _IN genomeid $string
174 :     _RETURN $string
175 :     _DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome
176 :     =cut
177 :     sub genus_species {
178 :     my ($arg) = @_;
179 :     my $fig=new FIG;
180 :     return $fig->genus_species($arg);
181 :     }
182 :    
183 :    
184 :     =begin WSDL
185 :     _IN peg $string
186 :     _RETURN $string
187 :     _DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format.
188 :     =cut
189 :     sub fid2dna {
190 :     my ($arg) = @_;
191 :     return stdin_caller("fid2dna", $arg);
192 :     }
193 :    
194 :     =begin WSDL
195 :     _IN peg $string
196 :     _RETURN $string
197 :     _DOC Get the DNA sequence for a set of protein identifiers. Input is a comma-joined list of pegs, output is the DNA sequence in fasta format.
198 :     =cut
199 :     sub fids2dna {
200 :     my ($arg) = @_;
201 :     my $seq;
202 :     foreach my $peg (split /\,/, $arg) {
203 :     $seq .= stdin_caller("fid2dna", $peg);
204 :     }
205 :     return $seq;
206 :     }
207 :    
208 :    
209 :     =begin WSDL
210 :     _IN genomeid $string
211 :     _RETURN @string
212 :     _DOC Get a comma-separated list of all the contigs in a genome
213 :     =cut
214 :     sub contigs_of {
215 :     my ($genome)=@_;
216 :     my $fig = new FIG;
217 :     return join(",", $fig->contigs_of($genome));
218 :     }
219 :    
220 :    
221 :     =begin WSDL
222 :     _IN genomeid $string
223 :     _IN location1 $string
224 :     _IN location2 $string
225 :     _RETURN @string
226 :     _DOC Get the DNA sequence for a region in a genome. Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence in fasta format.
227 :     =cut
228 :     sub dna_sequence {
229 :     my ($genome, @locations)=@_;
230 :     my $fig = new FIG;
231 :     my $seq=$fig->dna_seq($genome,@locations);
232 :     return $seq;
233 :     }
234 :    
235 :    
236 :    
237 :     =begin WSDL
238 :     _IN genomeid $string
239 :     _RETURN $string
240 :     _DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome
241 :     =cut
242 :     sub pegs_of {
243 :     my ($arg) = @_;
244 :     my $fig = new FIG;
245 :     return (join ",", $fig->pegs_of($arg));
246 :     }
247 :    
248 :     =begin WSDL
249 :     _IN genomeid $string
250 :     _IN contig $string
251 :     _RETURN $string
252 :     _DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig
253 :     =cut
254 :     sub contig_ln {
255 :     my $fig = new FIG;
256 :     return $fig->contig_ln(@_);
257 :     }
258 :    
259 :     =begin WSDL
260 :     _IN genomeid $string
261 :     _RETURN $string
262 :     _DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0)
263 :     =cut
264 :     sub is_archaeal {
265 :     my ($arg) = @_;
266 :     return stdin_caller("is_archaeal", $arg);
267 :     }
268 :    
269 :     =begin WSDL
270 :     _IN genomeid $string
271 :     _RETURN $string
272 :     _DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0)
273 :     =cut
274 :     sub is_bacterial {
275 :     my ($arg) = @_;
276 :     return stdin_caller("is_bacterial", $arg);
277 :     }
278 :    
279 :     =begin WSDL
280 :     _IN genomeid $string
281 :     _RETURN $string
282 :     _DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0)
283 :     =cut
284 :     sub is_eukaryotic {
285 :     my ($arg) = @_;
286 :     return stdin_caller("is_eukaryotic", $arg);
287 :     }
288 :    
289 :     =begin WSDL
290 :     _IN genomeid $string
291 :     _RETURN $string
292 :     _DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0)
293 :     =cut
294 :     sub is_prokaryotic {
295 :     my ($arg) = @_;
296 :     return stdin_caller("is_prokaryotic", $arg);
297 :     }
298 :    
299 :     =begin WSDL
300 :     _IN peg $string
301 :     _RETURN $string
302 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence
303 :     =cut
304 :     sub translation_of {
305 :     my ($arg) = @_;
306 :     return stdin_caller("translation_of", $arg);
307 :     }
308 :    
309 :     =begin WSDL
310 :     _IN peg $string
311 :     _RETURN $string
312 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation
313 :     =cut
314 :     sub get_translation {
315 :     my ($arg1) = @_;
316 :     my $fig = new FIG;
317 :     my $result = $fig->get_translation($arg1);
318 :     return $result;
319 :     }
320 :    
321 :     =begin WSDL
322 :     _IN peg $string
323 :     _RETURN @string
324 :     _DOC Get the location of a peg on its contig. Input is a peg, output is list of loc on contig
325 :     =cut
326 :     sub feature_location {
327 :     my ($arg1) = @_;
328 :     my $fig = new FIG;
329 :     my @result = ($fig->feature_location($arg1));
330 :     return @result;
331 :     }
332 :    
333 :     =begin WSDL
334 :     _IN peg $string
335 :     _RETURN $string
336 :     _DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
337 :     =cut
338 :     sub aliases_of {
339 :     my ($arg) = @_;
340 :     return stdin_caller("aliases_of", $arg);
341 :     }
342 :    
343 :     =begin WSDL
344 :     _IN peg $string
345 :     _RETURN $string
346 :     _DOC Get the corresponding ids of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
347 :     =cut
348 :     sub get_corresponding_ids {
349 :     my ($arg) = @_;
350 :     my $fig = new FIG;
351 :     my @result = $fig->get_corresponding_ids($arg, 1);
352 :     return join("\t", map {join(":", $_->[1], $_->[0])} @result);
353 :     }
354 :    
355 :     =begin WSDL
356 :     _IN alias $string
357 :     _RETURN $string
358 :     _DOC Get the FIG ID (peg) for a given external identifier. Input is an identifier used by another database, output is our identifier
359 :     =cut
360 :     sub alias2fig {
361 :     my ($arg) = @_;
362 :     return stdin_caller("alias2fig", $arg);
363 :     }
364 :    
365 :    
366 :    
367 :     =begin WSDL
368 :     _IN EC_number $string ec code
369 :     _RETURN $string ec name
370 :     _DOC Get the name for a given E.C. number. Input is an EC number, output is the name
371 :     =cut
372 :     sub ec_name {
373 :     my $fig = new FIG;
374 :     my $result = $fig->ec_name(@_);
375 :     return $result;
376 :     }
377 :    
378 :    
379 :     =begin WSDL
380 :     _IN reaction_number $string reaction code number
381 :     _IN genomeid $string
382 :     _RETURN $string
383 :     _DOC Get a tab-separated list of [subsystem name, functional role, peg, subsystem variant code for that genome] for any given reaction id and genome id. Maps the reaction id to peg, peg to genome, and genome to variant code
384 :     =cut
385 :     sub reaction_to_role {
386 :     my ($rxn, $genomeid) = @_;
387 :     my $fig = new FIG;
388 :     my @ecs = $fig->catalyzed_by($rxn);
389 :    
390 :     my @return;
391 :     foreach my $ec (@ecs)
392 :     {
393 :     my @ssr = grep {$fig->genome_of($_->[2]) eq $genomeid} $fig->subsystems_for_ec($ec);
394 :     foreach my $ss (@ssr)
395 :     {
396 :     my $sub = $fig->get_subsystem($ss->[0]);
397 :     push @$ss, $sub->get_variant_code_for_genome($fig->genome_of($ss->[2])) if ($sub);
398 :     push @return, join("\t", @$ss);
399 :     }
400 :     }
401 :     return SOAP::Data->type('string')->name('reaction_to_roleReturn')->value(join("\n", @return));
402 :     }
403 :    
404 :    
405 :    
406 :     =begin WSDL
407 :     _IN peg $string
408 :     _RETURN $string
409 :     _DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
410 :     =cut
411 :     sub coupled_to {
412 :     my $fig = new FIG;
413 :     my $return=undef;
414 :     my @result = $fig->coupled_to(@_);
415 :     if (@result)
416 :     {
417 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
418 :     }
419 :     return $return;
420 :     }
421 :    
422 :     =begin WSDL
423 :     _IN peg $string
424 :     _RETURN $string
425 :     _DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
426 :     =cut
427 :     sub abstract_coupled_to {
428 :     my $fig = new FIG;
429 :     my $return=undef;
430 :     my @result = $fig->abstract_coupled_to(@_);
431 :     if (@result)
432 :     {
433 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
434 :     }
435 :     return $return;
436 :     }
437 :    
438 :     =begin WSDL
439 :     _IN peg_id $string
440 :     _IN n_pch_pins $string
441 :     _IN n_sims $string
442 :     _IN sim_cutoff $string
443 :     _IN color_sim_cutoff $string
444 :     _IN sort_by $string
445 :     _RETURN $string
446 :     _DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
447 :     =cut
448 :     sub pinned_region_data {
449 :     my ($peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;
450 :    
451 :     my $fig = new FIG;
452 :    
453 :     defined($n_pch_pins) or $n_pch_pins = 5;
454 :     defined($n_sims) or $n_sims = 0;
455 :     defined($sim_cutoff) or $sim_cutoff = 1e-20;
456 :     defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
457 :     defined($sort_by) or $sort_by = '';
458 :    
459 :     defined($fast_color) or $fast_color = 0;
460 :     defined($sims_from) or $sims_from = 'blast';
461 :     defined($region_size) or $region_size = 16000;
462 :    
463 :     my $pin_desc = {
464 :     'pegs' => [$peg],
465 :     'collapse_close_genomes' => 0,
466 :     'n_pch_pins' => $n_pch_pins,
467 :     'n_sims' => $n_sims,
468 :     'show_genomes' => '',
469 :     'sim_cutoff' => $sim_cutoff,
470 :     'color_sim_cutoff' => $color_sim_cutoff,
471 :     'sort_by' => $sort_by,
472 :     'show_genomes' => [],
473 :     };
474 :    
475 :     my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
476 :     my $txt = Dumper($maps);
477 :     return $txt;
478 :     }
479 :    
480 :    
481 :     =begin WSDL
482 :     _IN complete $string
483 :     _IN restrictions $string
484 :     _IN domain $string
485 :     _RETURN @string
486 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes
487 :     =cut
488 :     sub all_genomes {
489 :     my $fig=new FIG;
490 :     my @genomes=$fig->genomes(@_);
491 :     return \@genomes;
492 :     }
493 :    
494 :     =begin WSDL
495 :     _IN complete $string
496 :     _IN restrictions $string
497 :     _IN domain $string
498 :     _RETURN @string
499 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes.
500 :     =cut
501 :     sub genomes {
502 :     my $fig = new FIG;
503 :     my @result = $fig->genomes(@_);
504 :     my %genomes = map {($_ => $fig->genus_species($_))} $fig->genomes(@_);
505 :    
506 :     return \%genomes;
507 :     }
508 :    
509 :    
510 :     =begin WSDL
511 :     _IN pattern1 $string
512 :     _IN pattern2 $string
513 :     _RETURN @string
514 :     _DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set.
515 :     =cut
516 :     sub search_and_grep {
517 :     my ($arg1, $arg2) = @_;
518 :    
519 :     my $fig = new FIG;
520 :    
521 :     my ($pegs, $roles) = $fig->search_index($arg1);
522 :    
523 :     my (@result_list, $entry);
524 :    
525 :     for $entry (@$pegs) {
526 :     push (@result_list, grep(/$arg2/, @$entry));
527 :     }
528 :     push (@result_list, grep(/$arg2/, @$roles));
529 :     chomp @result_list;
530 :     my $return_value = join ("\n", @result_list);
531 :     return $return_value;
532 :     }
533 :    
534 :    
535 :    
536 :     sub search_genome {
537 :     my ($genome, $term)=@_;
538 :     my $fig = new FIG;
539 :     my ($pegs, $roles) = $fig->search_index($term);
540 :     my $result;
541 :     # For each feature, there is a tuple consisting of the (0) feature ID, (1) the organism name (genus and species), (2) the aliases, (3) the functional role, and (4) the relevant annotator
542 :    
543 :     foreach my $ent (grep {$_->[0] =~ /fig\|$genome\./} @$pegs) {
544 :     $result->{$ent->[0]}=$ent->[3];
545 :     }
546 :     return $result;
547 :     }
548 :    
549 :     =begin WSDL
550 :     _IN pattern $string
551 :     _RETURN @string
552 :     _DOC Search the database. Input is a pattern to search for, output is tab separated list of pegs and roles
553 :     =cut
554 :     sub simple_search {
555 :     my ($arg1)=@_;
556 :    
557 :     my $fig = new FIG;
558 :    
559 :     my ($pegs, $roles) = $fig->search_index($arg1);
560 :    
561 :     my (@result_list, $entry);
562 :    
563 :     for $entry (@$pegs) {
564 :     push (@result_list, (join("\t", @$entry)));
565 :     }
566 :    
567 :     # push (@result_list, (join("\t", @$roles)));
568 :     chomp @result_list;
569 :     my $return_value = join ("\n", @result_list);
570 :     return $return_value;
571 :     }
572 :    
573 :    
574 :     =begin all_subsystem_classifications
575 :    
576 :     Get all the subsystems classifications. Tested with JSON.
577 :    
578 :     =cut
579 :     sub all_subsystem_classifications {
580 :     my $fig = new FIG;
581 :     my $output;
582 :    
583 :     my %found;
584 :     map {
585 :     my @classification=@{$fig->subsystem_classification($_)};
586 :     $#classification=1;
587 :     push @classification, $_;
588 :     push @$output, \@classification;
589 :     } sort {$a cmp $b} ($fig->all_subsystems());
590 :     return $output;
591 :     }
592 :    
593 :     =begin all_subsystems_with_roles
594 :    
595 :     Get all the subsystems and their roles. Tested with JSON
596 :    
597 :     =cut
598 :     sub all_subsystems_with_roles {
599 :     my $fig = new FIG;
600 :     my $output = $fig->all_subsystems_with_roles();
601 :     return $output;
602 :     }
603 :    
604 :    
605 :     =begin functions_to_subsystems
606 :    
607 :     Get the subsystems for several functions. Tested with JSON
608 :    
609 :     =cut
610 :     sub functions_to_subsystems {
611 :     my $fig=new FIG;
612 :     my %fns;
613 :     foreach my $fn (@_) {
614 :     $fn = uri_unescape($fn);
615 :     my @arr = $fig->function_to_subsystems($fn);
616 :     $fns{$fn} = \@arr;
617 :     }
618 :     return \%fns;
619 :     }
620 :    
621 :     =begin function_to_subsystems
622 :    
623 :     Get the subsystems for a function. Tested with JSON
624 :    
625 :     =cut
626 :     sub function_to_subsystems {
627 :     my $fn = shift;
628 :     $fn =~ s/\%20/ /g;
629 :     my $fig=new FIG;
630 :     my @arr = $fig->function_to_subsystems($fn);
631 :     return \@arr;
632 :     }
633 :    
634 :    
635 :     =begin WSDL
636 :     _RETURN $string list of families
637 :     _DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families
638 :     =cut
639 :     sub all_families {
640 :     my $fig = new FIG;
641 :     my $figfams = new FigFams($fig);
642 :    
643 :     my @out = $figfams->all_families;
644 :     print STDERR Dumper(@out);
645 :     return @out;
646 :     }
647 :    
648 :     =begin WSDL
649 :     _RETURN $string list of families and funcs
650 :     _DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions.
651 :     =cut
652 :     sub all_families_with_funcs {
653 :     my $fig = new FIG;
654 :     my $figfams = new FigFams($fig);
655 :    
656 :     my @out =$figfams->all_families_with_funcs;
657 :     return @out;
658 :     }
659 :    
660 :     =begin WSDL
661 :     _IN families $string list of famids
662 :     _RETURN $string 2 col table, famid, peg
663 :     _DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg]
664 :     =cut
665 :     sub list_members {
666 :     my ($famids) = @_;
667 :     my $fig = new FIG;
668 :     my $figfams = new FigFams($fig);
669 :     my @in = split(/\t/, $famids);
670 :     warn("Starting 2 list members $famids\n");
671 :     my @out = ();
672 :     foreach my $famid (@in)
673 :     {
674 :     my $famO = new FigFam($fig,$famid);
675 :     foreach my $peg ($famO->list_members)
676 :     {
677 :     push(@out,[$famid,$peg]);
678 :     }
679 :     }
680 :     return @out;
681 :     }
682 :    
683 :     =begin WSDL
684 :     _IN families $string list of pegs
685 :     _RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
686 :     _DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of families, returns a 3-column comma separated table [PEG, Function, Aliases]
687 :     =cut
688 :     sub CDS_data {
689 :     my ($pegs) = @_;
690 :     my $fig = new FIG;
691 :     my $figfams = new FigFams($fig);
692 :     my @in = split(/\t/, $pegs);
693 :    
694 :     #warn("Starting CDS data $pegs\n");
695 :     #print STDERR &Dumper($pegs);
696 :    
697 :     my @out = ();
698 :     foreach my $peg (@in)
699 :     {
700 :     my @famids = $figfams->families_containing_peg($peg);
701 :     foreach my $famid (@famids)
702 :     {
703 :     push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
704 :     }
705 :     }
706 :     return @out;
707 :     }
708 :    
709 :     =begin WSDL
710 :     _IN families $string list of pegs
711 :     _RETURN $string a 2-column table [PEG,Sequence]
712 :     _DOC Get the protein sequences for a list of FIGfams. Input is a tab-separated list of families, returns a 2-column comma separated table of [PEG, sequence]
713 :     =cut
714 :     sub CDS_sequences {
715 :     my ($pegs) = @_;
716 :     my $fig = new FIG;
717 :     my $figfams = new FigFams($fig);
718 :     my @in = split(/\t/, $pegs);
719 :    
720 :     #warn("Starting CDS seq $pegs\n");
721 :     #print STDERR &Dumper($pegs);
722 :     my @out = ();
723 :     foreach my $peg (@in)
724 :     {
725 :     push(@out,[$peg,$fig->get_translation($peg)]);
726 :     }
727 :     return @out;
728 :     }
729 :    
730 :     =begin WSDL
731 :     _IN sequences $string list of id seq pairs
732 :     _RETURN $string returns a 2-column table [Id,FamilyID]
733 :     _DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family.
734 :     =cut
735 :     sub is_member_of {
736 :     my ($id_seqs) = @_;
737 :     my $fig = new FIG;
738 :     my $figfams = new FigFams($fig);
739 :     #warn("Doing is member $id_seqs\n");
740 :     #print STDERR &Dumper($id_seqs);
741 :    
742 :     my @in = split(/\n/, $id_seqs);
743 :     my @out = ();
744 :     foreach my $pair (@in)
745 :     {
746 :     my($id,$seq) = split(/\t/, $pair);
747 :     my($famO,undef) = $figfams->place_in_family($seq);
748 :     if ($famO)
749 :     {
750 :     push(@out,[$id,$famO->family_id]);
751 :     }
752 :     }
753 :     return @out;
754 :     }
755 :    
756 :     =begin WSDL
757 :     _IN peg $string
758 :     _IN maxN $string
759 :     _IN maxP $string
760 :     _RETURN $string
761 :     _DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used.
762 :     =cut
763 :     sub sims {
764 :     my ($peg, $maxN, $maxP)=@_;
765 :     unless (defined $maxN) {$maxN=50}
766 :     unless (defined $maxP) {$maxP=1e-5}
767 :     my $fig=new FIG;
768 :     my $return=undef;
769 :     foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
770 :     {
771 :     $return .= join("\t", @$sim). "\n";
772 :     }
773 :    
774 :     return $return;
775 :     }
776 :    
777 :    
778 :    
779 :     ##### INTERNAL METHODS
780 :    
781 :     sub stdin_caller {
782 :     my ($name, $arg) = @_;
783 :     my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
784 :     if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
785 :     {
786 :     die "Cannot run open3 $name: $!";
787 :     }
788 :    
789 :     $wr->write($arg);
790 :     close($wr);
791 :    
792 :     @std_out= <$rd>;
793 :     close($rd);
794 :     waitpid $pid, 0;
795 :     $return_value = join ("", @std_out);
796 :     return $return_value;
797 :     }
798 :    
799 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3