[Bio] / FigWebServices / rest_seed.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/rest_seed.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download)

1 : redwards 1.1 #__perl__
2 :    
3 :     # NOTE THAT use strict will break this!!
4 :     use lib '/home/redwards/perl/lib/perl5/site_perl/5.8.7/i686-linux/';
5 :     use CGI;
6 :     use CGI::Carp qw/fatalsToBrowser/;
7 :     use JSON::XS;
8 :    
9 :     use FIG;
10 :     use FIG_Config;
11 :     use Data::Dumper;
12 : redwards 1.2 use FFs;
13 :     use FF;
14 : redwards 1.1 use IPC::Open3;
15 :     use PinnedRegions;
16 :     use URI::Escape;
17 :    
18 :    
19 :     =pod
20 :    
21 :     =head1 rest_seed.cgi
22 :    
23 :     YAWS - Yet another web service!
24 :    
25 :     Why: We're using rpc encoding which is basically URL encoding. In this, I call something like
26 :    
27 :     http://bioseed.mcs.anl.gov/~redwards/FIG/rest_seed.cgi/multiply/2/3/4/5
28 :    
29 :     and get a response. Why do we need another web service? Mainly because of the Google work. Google pretty much exclusively deals with http requests, and eschews SOAP and other encodings as being too complex.
30 :    
31 :     The data returned is all in JSON format (http://www.json.org/) which is the Javascript object notation format. JSON is a really light weight markup language that cna handle complex objects quite easily.
32 :    
33 :     I am also aiming for lightweight code. In this case, we're not going to instantiate anything until we need it. Hopefully.
34 :    
35 :     NOTE: I can't figure out how to encode forward slashes in URLs. We cant escape them with two slashes (\\) since the browser concatenates them, and we cant encode them with url_encode cos the server craps out. For now I am using the regexp forwardslash (case insensitive)
36 :    
37 :     =cut
38 :    
39 :     # a really simple RESTful web service that returns seed data
40 :    
41 :     my $cgi=new CGI qw/:standard/;
42 :     my $json= new JSON::XS;
43 :    
44 :     print $cgi->header('text/plain');
45 :    
46 :     # get the query with path so we get the RESTful information
47 :     my $abs = $cgi->url(-absolute=>1); # this will be the cgi-bin/rest.cgi stuff
48 :     my $rest = $cgi->url(-path_info=>1);
49 : redwards 1.3 $rest =~ s/^.*$abs\/*//;
50 : redwards 1.1
51 :     # we need to escape double slashes so that we don't split on them
52 :    
53 :     my @rest=split m#/#, $rest;
54 :    
55 :     #my @rest=split m#(?<!/)/(?!/)#, $rest; # this matches a '/' that is neither preceeded nor followed by a '/', so '//' does not match
56 :    
57 :     map {$rest[$_] =~ s#forwardslash#/#gi} (0 .. $#rest);
58 :    
59 :     my $method = shift @rest;
60 :    
61 : redwards 1.3 if ($method =~ /^http/) {
62 :     print <<EOF;
63 :     THERE IS AN ERROR PARSING THE URL:
64 :     Absolute: $abs
65 :     Rest: $rest
66 :    
67 :     EOF
68 :     exit 0;
69 :     }
70 :    
71 :    
72 :     if (!$method) {
73 :     print "{'result' : 'undef'}\n";
74 :     exit 0;
75 :     }
76 :    
77 : redwards 1.1 # there is no good way of passing a null value!!
78 :     map {undef $rest[$_] if ($rest[$_] eq "undef" || $rest[$_] eq "null")} (0..$#rest);
79 :    
80 :     #my $result = $json->encode({url=>$rest, query=>\@rest, result => &{$method}(@rest) });
81 :     my $result = $json->encode({result => &{$method}(@rest) });
82 :    
83 :     print $result, "\n";
84 :    
85 :    
86 :    
87 :     =pod
88 :    
89 :     =head1 multiply.
90 :    
91 :     This is a really simple method that just multiplies two numbers! It's great for testing things out
92 :    
93 :     =cut
94 :    
95 :     sub multiply {
96 :    
97 :     my $x = 1;
98 :     map {$x = $x * $_} @_;
99 :     return $x;
100 :     }
101 :    
102 :     =head1 returnNumbers
103 :    
104 :     And this method returns a reference to an array, so you can test JSON parsing
105 :    
106 :     =cut
107 :    
108 :     sub returnNumbers {
109 :     return [21, 23];
110 :     }
111 :    
112 :    
113 :    
114 :    
115 :     =begin WSDL
116 :     _IN alias $string
117 :     _RETURN $string
118 :     _DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence
119 :     =cut
120 :     sub ali_to_seq {
121 :     my ($arg) = @_;
122 :     return stdin_caller("ali_to_seq", $arg);
123 :     }
124 :    
125 :     =begin WSDL
126 :     _IN pegs $string
127 :     _RETURN $string
128 :     _DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome.
129 :     =cut
130 :     sub adjacent {
131 :     my ($arg) = @_;
132 :     $arg =~ s/\,\s*/\n/g;
133 :     return stdin_caller("adjacent", $arg);
134 :     }
135 :    
136 :     =begin WSDL
137 :     _IN peg $string
138 :     _RETURN $string
139 :     _DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster]
140 :     =cut
141 :     sub cluster_by_bbhs {
142 :     my ($arg) = @_;
143 :     return stdin_caller("cluster_by_bbhs", $arg);
144 :     }
145 :    
146 :     =begin WSDL
147 :     _IN peg $string
148 :     _RETURN $string
149 :     _DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster]
150 :     =cut
151 :     sub cluster_by_sim {
152 :     my ($arg) = @_;
153 :     return stdin_caller("cluster_by_sim", $arg);
154 :     }
155 :    
156 :     =begin WSDL
157 :     _IN peg $string
158 :     _RETURN $string
159 :     _DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function]
160 :     =cut
161 :     sub external_calls {
162 :     my ($arg) = @_;
163 :     return stdin_caller("external_calls", $arg);
164 :     }
165 :    
166 :     =begin WSDL
167 :     _IN peg $string
168 :     _RETURN $string
169 :     _DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function
170 :     =cut
171 :     sub function_of {
172 :     my ($arg) = @_;
173 :     my $fig=new FIG;
174 :     return scalar($fig->function_of($arg));
175 :     }
176 :    
177 :     =begin WSDL
178 :     _IN peg $string
179 :     _RETURN $string
180 :     _DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes
181 :     =cut
182 :     sub genomes_of {
183 :     my ($arg) = @_;
184 :     return stdin_caller("genomes_of", $arg);
185 :     }
186 :    
187 :    
188 :     =begin WSDL
189 :     _IN genomeid $string
190 :     _RETURN $string
191 :     _DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome
192 :     =cut
193 :     sub genus_species {
194 :     my ($arg) = @_;
195 :     my $fig=new FIG;
196 :     return $fig->genus_species($arg);
197 :     }
198 :    
199 :    
200 :     =begin WSDL
201 :     _IN peg $string
202 :     _RETURN $string
203 :     _DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format.
204 :     =cut
205 :     sub fid2dna {
206 :     my ($arg) = @_;
207 :     return stdin_caller("fid2dna", $arg);
208 :     }
209 :    
210 :     =begin WSDL
211 :     _IN peg $string
212 :     _RETURN $string
213 :     _DOC Get the DNA sequence for a set of protein identifiers. Input is a comma-joined list of pegs, output is the DNA sequence in fasta format.
214 :     =cut
215 :     sub fids2dna {
216 :     my ($arg) = @_;
217 :     my $seq;
218 :     foreach my $peg (split /\,/, $arg) {
219 :     $seq .= stdin_caller("fid2dna", $peg);
220 :     }
221 :     return $seq;
222 :     }
223 :    
224 :    
225 :     =begin WSDL
226 :     _IN genomeid $string
227 :     _RETURN @string
228 :     _DOC Get a comma-separated list of all the contigs in a genome
229 :     =cut
230 :     sub contigs_of {
231 :     my ($genome)=@_;
232 :     my $fig = new FIG;
233 :     return join(",", $fig->contigs_of($genome));
234 :     }
235 :    
236 :    
237 :     =begin WSDL
238 :     _IN genomeid $string
239 :     _IN location1 $string
240 :     _IN location2 $string
241 :     _RETURN @string
242 :     _DOC Get the DNA sequence for a region in a genome. Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence in fasta format.
243 :     =cut
244 :     sub dna_sequence {
245 :     my ($genome, @locations)=@_;
246 :     my $fig = new FIG;
247 :     my $seq=$fig->dna_seq($genome,@locations);
248 :     return $seq;
249 :     }
250 :    
251 :    
252 :    
253 :     =begin WSDL
254 :     _IN genomeid $string
255 :     _RETURN $string
256 :     _DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome
257 :     =cut
258 :     sub pegs_of {
259 :     my ($arg) = @_;
260 :     my $fig = new FIG;
261 :     return (join ",", $fig->pegs_of($arg));
262 :     }
263 :    
264 :     =begin WSDL
265 :     _IN genomeid $string
266 :     _IN contig $string
267 :     _RETURN $string
268 :     _DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig
269 :     =cut
270 :     sub contig_ln {
271 :     my $fig = new FIG;
272 :     return $fig->contig_ln(@_);
273 :     }
274 :    
275 :     =begin WSDL
276 :     _IN genomeid $string
277 :     _RETURN $string
278 :     _DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0)
279 :     =cut
280 :     sub is_archaeal {
281 :     my ($arg) = @_;
282 :     return stdin_caller("is_archaeal", $arg);
283 :     }
284 :    
285 :     =begin WSDL
286 :     _IN genomeid $string
287 :     _RETURN $string
288 :     _DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0)
289 :     =cut
290 :     sub is_bacterial {
291 :     my ($arg) = @_;
292 :     return stdin_caller("is_bacterial", $arg);
293 :     }
294 :    
295 :     =begin WSDL
296 :     _IN genomeid $string
297 :     _RETURN $string
298 :     _DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0)
299 :     =cut
300 :     sub is_eukaryotic {
301 :     my ($arg) = @_;
302 :     return stdin_caller("is_eukaryotic", $arg);
303 :     }
304 :    
305 :     =begin WSDL
306 :     _IN genomeid $string
307 :     _RETURN $string
308 :     _DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0)
309 :     =cut
310 :     sub is_prokaryotic {
311 :     my ($arg) = @_;
312 :     return stdin_caller("is_prokaryotic", $arg);
313 :     }
314 :    
315 :     =begin WSDL
316 :     _IN peg $string
317 :     _RETURN $string
318 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence
319 :     =cut
320 :     sub translation_of {
321 :     my ($arg) = @_;
322 :     return stdin_caller("translation_of", $arg);
323 :     }
324 :    
325 :     =begin WSDL
326 :     _IN peg $string
327 :     _RETURN $string
328 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation
329 :     =cut
330 :     sub get_translation {
331 :     my ($arg1) = @_;
332 :     my $fig = new FIG;
333 :     my $result = $fig->get_translation($arg1);
334 :     return $result;
335 :     }
336 :    
337 :     =begin WSDL
338 :     _IN peg $string
339 :     _RETURN @string
340 :     _DOC Get the location of a peg on its contig. Input is a peg, output is list of loc on contig
341 :     =cut
342 :     sub feature_location {
343 :     my ($arg1) = @_;
344 :     my $fig = new FIG;
345 :     my @result = ($fig->feature_location($arg1));
346 :     return @result;
347 :     }
348 :    
349 :     =begin WSDL
350 :     _IN peg $string
351 :     _RETURN $string
352 :     _DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
353 :     =cut
354 :     sub aliases_of {
355 :     my ($arg) = @_;
356 :     return stdin_caller("aliases_of", $arg);
357 :     }
358 :    
359 :     =begin WSDL
360 :     _IN peg $string
361 :     _RETURN $string
362 :     _DOC Get the corresponding ids of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
363 :     =cut
364 :     sub get_corresponding_ids {
365 :     my ($arg) = @_;
366 :     my $fig = new FIG;
367 :     my @result = $fig->get_corresponding_ids($arg, 1);
368 :     return join("\t", map {join(":", $_->[1], $_->[0])} @result);
369 :     }
370 :    
371 :     =begin WSDL
372 :     _IN alias $string
373 :     _RETURN $string
374 :     _DOC Get the FIG ID (peg) for a given external identifier. Input is an identifier used by another database, output is our identifier
375 :     =cut
376 :     sub alias2fig {
377 :     my ($arg) = @_;
378 :     return stdin_caller("alias2fig", $arg);
379 :     }
380 :    
381 :    
382 :    
383 :     =begin WSDL
384 :     _IN EC_number $string ec code
385 :     _RETURN $string ec name
386 :     _DOC Get the name for a given E.C. number. Input is an EC number, output is the name
387 :     =cut
388 :     sub ec_name {
389 :     my $fig = new FIG;
390 :     my $result = $fig->ec_name(@_);
391 :     return $result;
392 :     }
393 :    
394 :    
395 :     =begin WSDL
396 :     _IN reaction_number $string reaction code number
397 :     _IN genomeid $string
398 :     _RETURN $string
399 :     _DOC Get a tab-separated list of [subsystem name, functional role, peg, subsystem variant code for that genome] for any given reaction id and genome id. Maps the reaction id to peg, peg to genome, and genome to variant code
400 :     =cut
401 :     sub reaction_to_role {
402 :     my ($rxn, $genomeid) = @_;
403 :     my $fig = new FIG;
404 :     my @ecs = $fig->catalyzed_by($rxn);
405 :    
406 :     my @return;
407 :     foreach my $ec (@ecs)
408 :     {
409 :     my @ssr = grep {$fig->genome_of($_->[2]) eq $genomeid} $fig->subsystems_for_ec($ec);
410 :     foreach my $ss (@ssr)
411 :     {
412 :     my $sub = $fig->get_subsystem($ss->[0]);
413 :     push @$ss, $sub->get_variant_code_for_genome($fig->genome_of($ss->[2])) if ($sub);
414 :     push @return, join("\t", @$ss);
415 :     }
416 :     }
417 :     return SOAP::Data->type('string')->name('reaction_to_roleReturn')->value(join("\n", @return));
418 :     }
419 :    
420 :    
421 :    
422 :     =begin WSDL
423 :     _IN peg $string
424 :     _RETURN $string
425 :     _DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
426 :     =cut
427 :     sub coupled_to {
428 :     my $fig = new FIG;
429 :     my $return=undef;
430 :     my @result = $fig->coupled_to(@_);
431 :     if (@result)
432 :     {
433 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
434 :     }
435 :     return $return;
436 :     }
437 :    
438 :     =begin WSDL
439 :     _IN peg $string
440 :     _RETURN $string
441 :     _DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
442 :     =cut
443 :     sub abstract_coupled_to {
444 :     my $fig = new FIG;
445 :     my $return=undef;
446 :     my @result = $fig->abstract_coupled_to(@_);
447 :     if (@result)
448 :     {
449 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
450 :     }
451 :     return $return;
452 :     }
453 :    
454 :     =begin WSDL
455 :     _IN peg_id $string
456 :     _IN n_pch_pins $string
457 :     _IN n_sims $string
458 :     _IN sim_cutoff $string
459 :     _IN color_sim_cutoff $string
460 :     _IN sort_by $string
461 :     _RETURN $string
462 :     _DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
463 :     =cut
464 :     sub pinned_region_data {
465 :     my ($peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;
466 :    
467 :     my $fig = new FIG;
468 :    
469 :     defined($n_pch_pins) or $n_pch_pins = 5;
470 :     defined($n_sims) or $n_sims = 0;
471 :     defined($sim_cutoff) or $sim_cutoff = 1e-20;
472 :     defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
473 :     defined($sort_by) or $sort_by = '';
474 :    
475 :     defined($fast_color) or $fast_color = 0;
476 :     defined($sims_from) or $sims_from = 'blast';
477 :     defined($region_size) or $region_size = 16000;
478 :    
479 :     my $pin_desc = {
480 :     'pegs' => [$peg],
481 :     'collapse_close_genomes' => 0,
482 :     'n_pch_pins' => $n_pch_pins,
483 :     'n_sims' => $n_sims,
484 :     'show_genomes' => '',
485 :     'sim_cutoff' => $sim_cutoff,
486 :     'color_sim_cutoff' => $color_sim_cutoff,
487 :     'sort_by' => $sort_by,
488 :     'show_genomes' => [],
489 :     };
490 :    
491 :     my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
492 :     my $txt = Dumper($maps);
493 :     return $txt;
494 :     }
495 :    
496 :    
497 :     =begin WSDL
498 :     _IN complete $string
499 :     _IN restrictions $string
500 :     _IN domain $string
501 :     _RETURN @string
502 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes
503 :     =cut
504 :     sub all_genomes {
505 :     my $fig=new FIG;
506 :     my @genomes=$fig->genomes(@_);
507 :     return \@genomes;
508 :     }
509 :    
510 :     =begin WSDL
511 :     _IN complete $string
512 :     _IN restrictions $string
513 :     _IN domain $string
514 :     _RETURN @string
515 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes.
516 :     =cut
517 :     sub genomes {
518 :     my $fig = new FIG;
519 :     my @result = $fig->genomes(@_);
520 :     my %genomes = map {($_ => $fig->genus_species($_))} $fig->genomes(@_);
521 :    
522 :     return \%genomes;
523 :     }
524 :    
525 :    
526 :     =begin WSDL
527 :     _IN pattern1 $string
528 :     _IN pattern2 $string
529 :     _RETURN @string
530 :     _DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set.
531 :     =cut
532 :     sub search_and_grep {
533 :     my ($arg1, $arg2) = @_;
534 :    
535 :     my $fig = new FIG;
536 :    
537 :     my ($pegs, $roles) = $fig->search_index($arg1);
538 :    
539 :     my (@result_list, $entry);
540 :    
541 :     for $entry (@$pegs) {
542 :     push (@result_list, grep(/$arg2/, @$entry));
543 :     }
544 :     push (@result_list, grep(/$arg2/, @$roles));
545 :     chomp @result_list;
546 :     my $return_value = join ("\n", @result_list);
547 :     return $return_value;
548 :     }
549 :    
550 :    
551 :    
552 :     sub search_genome {
553 :     my ($genome, $term)=@_;
554 :     my $fig = new FIG;
555 :     my ($pegs, $roles) = $fig->search_index($term);
556 :     my $result;
557 :     # For each feature, there is a tuple consisting of the (0) feature ID, (1) the organism name (genus and species), (2) the aliases, (3) the functional role, and (4) the relevant annotator
558 :    
559 :     foreach my $ent (grep {$_->[0] =~ /fig\|$genome\./} @$pegs) {
560 :     $result->{$ent->[0]}=$ent->[3];
561 :     }
562 :     return $result;
563 :     }
564 :    
565 :     =begin WSDL
566 :     _IN pattern $string
567 :     _RETURN @string
568 :     _DOC Search the database. Input is a pattern to search for, output is tab separated list of pegs and roles
569 :     =cut
570 :     sub simple_search {
571 :     my ($arg1)=@_;
572 :    
573 :     my $fig = new FIG;
574 :    
575 :     my ($pegs, $roles) = $fig->search_index($arg1);
576 :    
577 :     my (@result_list, $entry);
578 :    
579 :     for $entry (@$pegs) {
580 :     push (@result_list, (join("\t", @$entry)));
581 :     }
582 :    
583 :     # push (@result_list, (join("\t", @$roles)));
584 :     chomp @result_list;
585 :     my $return_value = join ("\n", @result_list);
586 :     return $return_value;
587 :     }
588 :    
589 :    
590 :     =begin all_subsystem_classifications
591 :    
592 :     Get all the subsystems classifications. Tested with JSON.
593 :    
594 :     =cut
595 :     sub all_subsystem_classifications {
596 :     my $fig = new FIG;
597 :     my $output;
598 :    
599 :     my %found;
600 :     map {
601 :     my @classification=@{$fig->subsystem_classification($_)};
602 :     $#classification=1;
603 :     push @classification, $_;
604 :     push @$output, \@classification;
605 :     } sort {$a cmp $b} ($fig->all_subsystems());
606 :     return $output;
607 :     }
608 :    
609 :     =begin all_subsystems_with_roles
610 :    
611 :     Get all the subsystems and their roles. Tested with JSON
612 :    
613 :     =cut
614 :     sub all_subsystems_with_roles {
615 :     my $fig = new FIG;
616 :     my $output = $fig->all_subsystems_with_roles();
617 :     return $output;
618 :     }
619 :    
620 :    
621 :     =begin functions_to_subsystems
622 :    
623 :     Get the subsystems for several functions. Tested with JSON
624 :    
625 :     =cut
626 :     sub functions_to_subsystems {
627 :     my $fig=new FIG;
628 :     my %fns;
629 :     foreach my $fn (@_) {
630 :     $fn = uri_unescape($fn);
631 :     my @arr = $fig->function_to_subsystems($fn);
632 :     $fns{$fn} = \@arr;
633 :     }
634 :     return \%fns;
635 :     }
636 :    
637 :     =begin function_to_subsystems
638 :    
639 :     Get the subsystems for a function. Tested with JSON
640 :    
641 :     =cut
642 :     sub function_to_subsystems {
643 :     my $fn = shift;
644 : redwards 1.3 #$fn =~ s/\%20/ /g;
645 :     $fn = uri_unescape($fn);
646 : redwards 1.1 my $fig=new FIG;
647 :     my @arr = $fig->function_to_subsystems($fn);
648 :     return \@arr;
649 :     }
650 :    
651 :    
652 :     =begin WSDL
653 :     _RETURN $string list of families
654 :     _DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families
655 :     =cut
656 :     sub all_families {
657 :     my $fig = new FIG;
658 : redwards 1.2 my $figfams = new FFs($fig);
659 : redwards 1.1
660 :     my @out = $figfams->all_families;
661 :     print STDERR Dumper(@out);
662 :     return @out;
663 :     }
664 :    
665 :     =begin WSDL
666 :     _RETURN $string list of families and funcs
667 :     _DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions.
668 :     =cut
669 :     sub all_families_with_funcs {
670 :     my $fig = new FIG;
671 : redwards 1.2 my $figfams = new FFs($fig);
672 : redwards 1.1
673 :     my @out =$figfams->all_families_with_funcs;
674 :     return @out;
675 :     }
676 :    
677 :     =begin WSDL
678 :     _IN families $string list of famids
679 :     _RETURN $string 2 col table, famid, peg
680 :     _DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg]
681 :     =cut
682 :     sub list_members {
683 :     my ($famids) = @_;
684 :     my $fig = new FIG;
685 : redwards 1.2 my $figfams = new FFs($fig);
686 : redwards 1.1 my @in = split(/\t/, $famids);
687 :     warn("Starting 2 list members $famids\n");
688 :     my @out = ();
689 :     foreach my $famid (@in)
690 :     {
691 : redwards 1.2 my $famO = new FF($fig,$famid);
692 : redwards 1.1 foreach my $peg ($famO->list_members)
693 :     {
694 :     push(@out,[$famid,$peg]);
695 :     }
696 :     }
697 :     return @out;
698 :     }
699 :    
700 :     =begin WSDL
701 :     _IN families $string list of pegs
702 :     _RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
703 :     _DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of families, returns a 3-column comma separated table [PEG, Function, Aliases]
704 :     =cut
705 :     sub CDS_data {
706 :     my ($pegs) = @_;
707 :     my $fig = new FIG;
708 : redwards 1.2 my $figfams = new FFs($fig);
709 : redwards 1.1 my @in = split(/\t/, $pegs);
710 :    
711 :     #warn("Starting CDS data $pegs\n");
712 :     #print STDERR &Dumper($pegs);
713 :    
714 :     my @out = ();
715 :     foreach my $peg (@in)
716 :     {
717 :     my @famids = $figfams->families_containing_peg($peg);
718 :     foreach my $famid (@famids)
719 :     {
720 :     push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
721 :     }
722 :     }
723 :     return @out;
724 :     }
725 :    
726 :     =begin WSDL
727 :     _IN families $string list of pegs
728 :     _RETURN $string a 2-column table [PEG,Sequence]
729 :     _DOC Get the protein sequences for a list of FIGfams. Input is a tab-separated list of families, returns a 2-column comma separated table of [PEG, sequence]
730 :     =cut
731 :     sub CDS_sequences {
732 :     my ($pegs) = @_;
733 :     my $fig = new FIG;
734 : redwards 1.2 my $figfams = new FFs($fig);
735 : redwards 1.1 my @in = split(/\t/, $pegs);
736 :    
737 :     #warn("Starting CDS seq $pegs\n");
738 :     #print STDERR &Dumper($pegs);
739 :     my @out = ();
740 :     foreach my $peg (@in)
741 :     {
742 :     push(@out,[$peg,$fig->get_translation($peg)]);
743 :     }
744 :     return @out;
745 :     }
746 :    
747 :     =begin WSDL
748 :     _IN sequences $string list of id seq pairs
749 :     _RETURN $string returns a 2-column table [Id,FamilyID]
750 :     _DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family.
751 :     =cut
752 :     sub is_member_of {
753 :     my ($id_seqs) = @_;
754 :     my $fig = new FIG;
755 : redwards 1.2 my $figfams = new FFs($fig);
756 : redwards 1.1 #warn("Doing is member $id_seqs\n");
757 :     #print STDERR &Dumper($id_seqs);
758 :    
759 :     my @in = split(/\n/, $id_seqs);
760 :     my @out = ();
761 :     foreach my $pair (@in)
762 :     {
763 :     my($id,$seq) = split(/\t/, $pair);
764 :     my($famO,undef) = $figfams->place_in_family($seq);
765 :     if ($famO)
766 :     {
767 :     push(@out,[$id,$famO->family_id]);
768 :     }
769 :     }
770 :     return @out;
771 :     }
772 :    
773 :     =begin WSDL
774 :     _IN peg $string
775 :     _IN maxN $string
776 :     _IN maxP $string
777 :     _RETURN $string
778 :     _DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used.
779 :     =cut
780 :     sub sims {
781 :     my ($peg, $maxN, $maxP)=@_;
782 :     unless (defined $maxN) {$maxN=50}
783 :     unless (defined $maxP) {$maxP=1e-5}
784 :     my $fig=new FIG;
785 :     my $return=undef;
786 :     foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
787 :     {
788 :     $return .= join("\t", @$sim). "\n";
789 :     }
790 :    
791 :     return $return;
792 :     }
793 :    
794 :    
795 :    
796 :     ##### INTERNAL METHODS
797 :    
798 :     sub stdin_caller {
799 :     my ($name, $arg) = @_;
800 :     my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
801 :     if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
802 :     {
803 :     die "Cannot run open3 $name: $!";
804 :     }
805 :    
806 :     $wr->write($arg);
807 :     close($wr);
808 :    
809 :     @std_out= <$rd>;
810 :     close($rd);
811 :     waitpid $pid, 0;
812 :     $return_value = join ("", @std_out);
813 :     return $return_value;
814 :     }
815 :    
816 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3