[Bio] / FigWebServices / webservices_seed.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/webservices_seed.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download)

1 : redwards 1.1 #__perl__
2 :    
3 :     use strict;
4 :     use Carp;
5 :     use SOAP::Lite;
6 :     use SOAP::Transport::HTTP;
7 :     use Data::Dumper;
8 :    
9 :     SOAP::Transport::HTTP::CGI
10 :     -> dispatch_to('SeedWebServices')
11 :     -> handle;
12 :    
13 :     package SeedWebServices;
14 :    
15 :     use FIG;
16 :     use FIG_Config;
17 :     use Data::Dumper;
18 :     use FigFams;
19 :     use FigFam;
20 :     use IPC::Open3;
21 :     use PinnedRegions;
22 :    
23 :    
24 :    
25 :     =begin WSDL
26 :     _IN alias $string
27 :     _RETURN $string
28 :     _DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence
29 :     =cut
30 :     sub ali_to_seq {
31 :     my ($class, $arg) = @_;
32 :     return stdin_caller($class, "ali_to_seq", $arg);
33 :     }
34 :    
35 :     =begin WSDL
36 :     _IN pegs $string
37 :     _RETURN $string
38 :     _DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome.
39 :     =cut
40 :     sub adjacent {
41 :     my ($class, $arg) = @_;
42 :     $arg =~ s/\,\s*/\n/g;
43 :     return stdin_caller($class, "adjacent", $arg);
44 :     }
45 :    
46 :     =begin WSDL
47 :     _IN peg $string
48 :     _RETURN $string
49 :     _DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster]
50 :     =cut
51 :     sub cluster_by_bbhs {
52 :     my ($class, $arg) = @_;
53 :     return stdin_caller($class, "cluster_by_bbhs", $arg);
54 :     }
55 :    
56 :     =begin WSDL
57 :     _IN peg $string
58 :     _RETURN $string
59 :     _DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster]
60 :     =cut
61 :     sub cluster_by_sim {
62 :     my ($class, $arg) = @_;
63 :     return stdin_caller($class, "cluster_by_sim", $arg);
64 :     }
65 :    
66 :     =begin WSDL
67 :     _IN peg $string
68 :     _RETURN $string
69 :     _DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function]
70 :     =cut
71 :     sub external_calls {
72 :     my ($class, $arg) = @_;
73 :     return stdin_caller($class, "external_calls", $arg);
74 :     }
75 :    
76 :     =begin WSDL
77 :     _IN peg $string
78 :     _RETURN $string
79 :     _DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function
80 :     =cut
81 :     sub function_of {
82 :     my ($class, $arg) = @_;
83 :     my $fig=new FIG;
84 :     return scalar($fig->function_of($arg));
85 :     }
86 :    
87 :     =begin WSDL
88 :     _IN peg $string
89 :     _RETURN $string
90 :     _DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes
91 :     =cut
92 :     sub genomes_of {
93 :     my ($class, $arg) = @_;
94 :     return stdin_caller($class, "genomes_of", $arg);
95 :     }
96 :    
97 :    
98 :     =begin WSDL
99 :     _IN genomeid $string
100 :     _RETURN $string
101 :     _DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome
102 :     =cut
103 :     sub genus_species {
104 :     my ($class, $arg) = @_;
105 :     my $fig=new FIG;
106 :     return $fig->genus_species($arg);
107 :     }
108 :    
109 :    
110 :     =begin WSDL
111 :     _IN peg $string
112 :     _RETURN $string
113 :     _DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format.
114 :     =cut
115 :     sub fid2dna {
116 :     my ($class, $arg) = @_;
117 :     return stdin_caller($class, "fid2dna", $arg);
118 :     }
119 :    
120 :    
121 :     =begin WSDL
122 :     _IN genomeid $string
123 :     _IN location1 $string
124 :     _IN location2 $string
125 :     _RETURN @string
126 :     _DOC Get the DNA sequence for a region in a genome. Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence in fasta format.
127 :     =cut
128 :     sub dna_sequence {
129 :     my ($class, $genome, @locations)=@_;
130 :     my $fig = new FIG;
131 :     my $seq=$fig->dna_seq($genome,@locations);
132 :     return $seq;
133 :     }
134 :    
135 :    
136 :    
137 :     =begin WSDL
138 :     _IN genomeid $string
139 :     _RETURN $string
140 :     _DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome
141 :     =cut
142 :     sub pegs_of {
143 :     my ($class, $arg) = @_;
144 :     my $fig = new FIG;
145 :     return (join ",", $fig->pegs_of($arg));
146 :     }
147 :    
148 :     =begin WSDL
149 :     _IN genomeid $string
150 :     _IN contig $string
151 :     _RETURN $string
152 :     _DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig
153 :     =cut
154 :     sub contig_ln {
155 :     my $class = shift();
156 :     my $fig = new FIG;
157 :     return $fig->contig_ln(@_);
158 :     }
159 :    
160 :     =begin WSDL
161 :     _IN genomeid $string
162 :     _RETURN $string
163 :     _DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0)
164 :     =cut
165 :     sub is_archaeal {
166 :     my ($class, $arg) = @_;
167 :     return stdin_caller($class, "is_archaeal", $arg);
168 :     }
169 :    
170 :     =begin WSDL
171 :     _IN genomeid $string
172 :     _RETURN $string
173 :     _DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0)
174 :     =cut
175 :     sub is_bacterial {
176 :     my ($class, $arg) = @_;
177 :     return stdin_caller($class, "is_bacterial", $arg);
178 :     }
179 :    
180 :     =begin WSDL
181 :     _IN genomeid $string
182 :     _RETURN $string
183 :     _DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0)
184 :     =cut
185 :     sub is_eukaryotic {
186 :     my ($class, $arg) = @_;
187 :     return stdin_caller($class, "is_eukaryotic", $arg);
188 :     }
189 :    
190 :     =begin WSDL
191 :     _IN genomeid $string
192 :     _RETURN $string
193 :     _DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0)
194 :     =cut
195 :     sub is_prokaryotic {
196 :     my ($class, $arg) = @_;
197 :     return stdin_caller($class, "is_prokaryotic", $arg);
198 :     }
199 :    
200 :     =begin WSDL
201 :     _IN peg $string
202 :     _RETURN $string
203 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence
204 :     =cut
205 :     sub translation_of {
206 :     my ($class, $arg) = @_;
207 :     return stdin_caller($class, "translation_of", $arg);
208 :     }
209 :    
210 :     =begin WSDL
211 :     _IN peg $string
212 :     _RETURN $string
213 :     _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation
214 :     =cut
215 :     sub get_translation {
216 :     my ($class, $arg1) = @_;
217 :     my $fig = new FIG;
218 :     my $result = $fig->get_translation($arg1);
219 :     return $result;
220 :     }
221 :    
222 :     =begin WSDL
223 :     _IN peg $string
224 :     _RETURN @string
225 :     _DOC Get the location of a peg on its contig. Input is a peg, output is list of loc on contig
226 :     =cut
227 :     sub feature_location {
228 :     my ($class, $arg1) = @_;
229 :     my $fig = new FIG;
230 :     my @result = ($fig->feature_location($arg1));
231 :     return @result;
232 :     }
233 :    
234 :     =begin WSDL
235 :     _IN peg $string
236 :     _RETURN $string
237 :     _DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
238 :     =cut
239 :     sub aliases_of {
240 :     my ($class, $arg) = @_;
241 :     return stdin_caller($class, "aliases_of", $arg);
242 :     }
243 :    
244 :     =begin WSDL
245 :     _IN alias $string
246 :     _RETURN $string
247 :     _DOC Get the FIG ID (peg) for a given external identifier. Input is an identifier used by another database, output is our identifier
248 :     =cut
249 :     sub alias2fig {
250 :     my ($class, $arg) = @_;
251 :     return stdin_caller($class, "alias2fig", $arg);
252 :     }
253 :    
254 :    
255 :    
256 :     =begin WSDL
257 :     _IN EC_number $string ec code
258 :     _RETURN $string ec name
259 :     _DOC Get the name for a given E.C. number. Input is an EC number, output is the name
260 :     =cut
261 :     sub ec_name {
262 :     my $class = shift();
263 :     my $fig = new FIG;
264 :     my $result = $fig->ec_name(@_);
265 :     return $result;
266 :     }
267 :    
268 :     =begin WSDL
269 :     _IN peg $string
270 :     _RETURN $string
271 :     _DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
272 :     =cut
273 :     sub coupled_to {
274 :     my $class = shift();
275 :     my $fig = new FIG;
276 :     my $return=undef;
277 :     my @result = $fig->coupled_to(@_);
278 :     if (@result)
279 :     {
280 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
281 :     }
282 :     return $return;
283 :     }
284 :    
285 :     =begin WSDL
286 :     _IN peg $string
287 :     _RETURN $string
288 :     _DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
289 :     =cut
290 :     sub abstract_coupled_to {
291 :     my $class = shift();
292 :     my $fig = new FIG;
293 :     my $return=undef;
294 :     my @result = $fig->abstract_coupled_to(@_);
295 :     if (@result)
296 :     {
297 :     $return = join("\n", map {$_->[0].",".$_->[1]} @result);
298 :     }
299 :     return $return;
300 :     }
301 :    
302 :     =begin WSDL
303 :     _IN peg_id $string
304 :     _IN n_pch_pins $string
305 :     _IN n_sims $string
306 :     _IN sim_cutoff $string
307 :     _IN color_sim_cutoff $string
308 :     _IN sort_by $string
309 :     _RETURN $string
310 :     _DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
311 :     =cut
312 :     sub pinned_region_data {
313 :     my ($class, $peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;
314 :    
315 :     my $fig = new FIG;
316 :    
317 :     defined($n_pch_pins) or $n_pch_pins = 5;
318 :     defined($n_sims) or $n_sims = 0;
319 :     defined($sim_cutoff) or $sim_cutoff = 1e-20;
320 :     defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
321 :     defined($sort_by) or $sort_by = '';
322 :    
323 :     defined($fast_color) or $fast_color = 0;
324 :     defined($sims_from) or $sims_from = 'blast';
325 :     defined($region_size) or $region_size = 16000;
326 :    
327 :     my $pin_desc = {
328 :     'pegs' => [$peg],
329 :     'collapse_close_genomes' => 0,
330 :     'n_pch_pins' => $n_pch_pins,
331 :     'n_sims' => $n_sims,
332 :     'show_genomes' => '',
333 :     'sim_cutoff' => $sim_cutoff,
334 :     'color_sim_cutoff' => $color_sim_cutoff,
335 :     'sort_by' => $sort_by,
336 :     'show_genomes' => [],
337 :     };
338 :    
339 :     my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
340 :     my $txt = Dumper($maps);
341 :     return $txt;
342 :     }
343 :    
344 :    
345 :     =begin WSDL
346 :     _IN complete $string
347 :     _IN restrictions $string
348 :     _IN domain $string
349 :     _RETURN @string
350 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes
351 :     =cut
352 :     sub all_genomes {
353 :     my $class = shift();
354 :     my $fig=new FIG;
355 :     my @genomes=$fig->genomes(@_);
356 :     return join(",", @genomes);
357 :     }
358 :    
359 :     =begin WSDL
360 :     _IN complete $string
361 :     _IN restrictions $string
362 :     _IN domain $string
363 :     _RETURN @string
364 :     _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes.
365 :     =cut
366 :     sub genomes {
367 :     my $class = shift();
368 :     my $fig = new FIG;
369 :     my @result = $fig->genomes(@_);
370 :     my @genomes;
371 :     foreach my $genome (@result)
372 :     {
373 :     print STDERR "Genome is -$genome-\n";
374 :    
375 :     my $genus_species = $fig->genus_species($genome);
376 :     push @genomes, join("\t",$genome,$genus_species);
377 :     # print STDERR join("\t",$genome,$genus_species);
378 :     }
379 :    
380 :     #print STDERR @genomes;
381 :     return @genomes;
382 :     #return @result;
383 :     }
384 :    
385 :     =begin WSDL
386 :     _IN pattern1 $string
387 :     _IN pattern2 $string
388 :     _RETURN @string
389 :     _DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set.
390 :     =cut
391 :     sub search_and_grep {
392 :     my ($class, $arg1, $arg2) = @_;
393 :    
394 :     my $fig = new FIG;
395 :    
396 :     my ($pegs, $roles) = $fig->search_index($arg1);
397 :    
398 :     my (@result_list, $entry);
399 :    
400 :     for $entry (@$pegs) {
401 :     push (@result_list, grep(/$arg2/, @$entry));
402 :     }
403 :     push (@result_list, grep(/$arg2/, @$roles));
404 :     chomp @result_list;
405 :     my $return_value = join ("\n", @result_list);
406 :     return $return_value;
407 :     }
408 :    
409 :    
410 :     =begin WSDL
411 :     _IN pattern $string
412 :     _RETURN @string
413 :     _DOC Search the database. Input is a pattern to search for, output is tab separated list of pegs and roles
414 :     =cut
415 :     sub simple_search {
416 :     my ($class, $arg1)=@_;
417 :    
418 :     my $fig = new FIG;
419 :    
420 :     my ($pegs, $roles) = $fig->search_index($arg1);
421 :    
422 :     my (@result_list, $entry);
423 :    
424 :     for $entry (@$pegs) {
425 :     push (@result_list, (join("\t", @$entry)));
426 :     }
427 :    
428 :     # push (@result_list, (join("\t", @$roles)));
429 :     chomp @result_list;
430 :     my $return_value = join ("\n", @result_list);
431 :     return $return_value;
432 :     }
433 :    
434 :    
435 :     =begin WSDL
436 :     _RETURN $string list of subsystems and their classifications
437 :     _DOC Get a list of all the subsystems and their classifications. No input needed, it just returns a list of all the subsystems and their classifications
438 :     =cut
439 :     sub all_subsystem_classifications {
440 :     my ($class) = @_;
441 :     my $fig = new FIG;
442 :     my $output;
443 :    
444 :     my %found;
445 :     map {
446 :     my @classification=@{$fig->subsystem_classification($_)};
447 :     $#classification=1;
448 :     push @classification, $_;
449 :     $output.= join("\t", @classification)."\n";
450 :     } sort {$a cmp $b} ($fig->all_subsystems());
451 :     return $output;
452 :     }
453 :    
454 :    
455 :     =begin WSDL
456 :     _RETURN $string list of families
457 :     _DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families
458 :     =cut
459 :     sub all_families {
460 :     my ($class) = @_;
461 :     my $fig = new FIG;
462 :     my $figfams = new FigFams($fig);
463 :    
464 :     my @out = $figfams->all_families;
465 :     print STDERR Dumper(@out);
466 :     return @out;
467 :     }
468 :    
469 :     =begin WSDL
470 :     _RETURN $string list of families and funcs
471 :     _DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions.
472 :     =cut
473 :     sub all_families_with_funcs {
474 :     my ($class) = @_;
475 :     my $fig = new FIG;
476 :     my $figfams = new FigFams($fig);
477 :    
478 :     my @out =$figfams->all_families_with_funcs;
479 :     return @out;
480 :     }
481 :    
482 :     =begin WSDL
483 :     _IN families $string list of famids
484 :     _RETURN $string 2 col table, famid, peg
485 :     _DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg]
486 :     =cut
487 :     sub list_members {
488 :     my ($class, $famids) = @_;
489 :     my $fig = new FIG;
490 :     my $figfams = new FigFams($fig);
491 :     my @in = split(/\t/, $famids);
492 :     warn("Starting 2 list members $famids\n");
493 :     my @out = ();
494 :     foreach my $famid (@in)
495 :     {
496 :     my $famO = new FigFam($fig,$famid);
497 :     foreach my $peg ($famO->list_members)
498 :     {
499 :     push(@out,[$famid,$peg]);
500 :     }
501 :     }
502 :     return @out;
503 :     }
504 :    
505 :     =begin WSDL
506 :     _IN families $string list of pegs
507 :     _RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
508 :     _DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of families, returns a 3-column comma separated table [PEG, Function, Aliases]
509 :     =cut
510 :     sub CDS_data {
511 :     my ($class, $pegs) = @_;
512 :     my $fig = new FIG;
513 :     my $figfams = new FigFams($fig);
514 :     my @in = split(/\t/, $pegs);
515 :    
516 :     #warn("Starting CDS data $pegs\n");
517 :     #print STDERR &Dumper($pegs);
518 :    
519 :     my @out = ();
520 :     foreach my $peg (@in)
521 :     {
522 :     my @famids = $figfams->families_containing_peg($peg);
523 :     foreach my $famid (@famids)
524 :     {
525 :     push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
526 :     }
527 :     }
528 :     return @out;
529 :     }
530 :    
531 :     =begin WSDL
532 :     _IN families $string list of pegs
533 :     _RETURN $string a 2-column table [PEG,Sequence]
534 :     _DOC Get the protein sequences for a list of FIGfams. Input is a tab-separated list of families, returns a 2-column comma separated table of [PEG, sequence]
535 :     =cut
536 :     sub CDS_sequences {
537 :     my ($class, $pegs) = @_;
538 :     my $fig = new FIG;
539 :     my $figfams = new FigFams($fig);
540 :     my @in = split(/\t/, $pegs);
541 :    
542 :     #warn("Starting CDS seq $pegs\n");
543 :     #print STDERR &Dumper($pegs);
544 :     my @out = ();
545 :     foreach my $peg (@in)
546 :     {
547 :     push(@out,[$peg,$fig->get_translation($peg)]);
548 :     }
549 :     return @out;
550 :     }
551 :    
552 :     =begin WSDL
553 :     _IN sequences $string list of id seq pairs
554 :     _RETURN $string returns a 2-column table [Id,FamilyID]
555 :     _DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family.
556 :     =cut
557 :     sub is_member_of {
558 :     my ($class, $id_seqs) = @_;
559 :     my $fig = new FIG;
560 :     my $figfams = new FigFams($fig);
561 :     #warn("Doing is member $id_seqs\n");
562 :     #print STDERR &Dumper($id_seqs);
563 :    
564 :     my @in = split(/\n/, $id_seqs);
565 :     my @out = ();
566 :     foreach my $pair (@in)
567 :     {
568 :     my($id,$seq) = split(/\t/, $pair);
569 :     my($famO,undef) = $figfams->place_in_family($seq);
570 :     if ($famO)
571 :     {
572 :     push(@out,[$id,$famO->family_id]);
573 :     }
574 :     }
575 :     return @out;
576 :     }
577 :    
578 :     =begin WSDL
579 :     _IN peg $string
580 :     _IN maxN $string
581 :     _IN maxP $string
582 :     _RETURN $string
583 :     _DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used.
584 :     =cut
585 :     sub sims {
586 :     my ($class, $peg, $maxN, $maxP)=@_;
587 :     unless (defined $maxN) {$maxN=50}
588 :     unless (defined $maxP) {$maxP=1e-5}
589 :     my $fig=new FIG;
590 :     my $return=undef;
591 :     foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
592 :     {
593 :     $return .= join("\t", @$sim). "\n";
594 :     }
595 :    
596 :     return $return;
597 :     }
598 :    
599 :    
600 :    
601 :     ##### INTERNAL METHODS
602 :    
603 :     sub stdin_caller {
604 :     my ($class, $name, $arg) = @_;
605 :     my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
606 :     if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
607 :     {
608 :     die "Cannot run open3 $name: $!";
609 :     }
610 :    
611 :     $wr->write($arg);
612 :     close($wr);
613 :    
614 :     @std_out= <$rd>;
615 :     close($rd);
616 :     waitpid $pid, 0;
617 :     $return_value = join ("", @std_out);
618 :     return $return_value;
619 :     }
620 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3