Parent Directory
|
Revision Log
Revision 1.1 - (view) (download)
1 : | redwards | 1.1 | #__perl__ |
2 : | |||
3 : | # NOTE THAT use strict will break this!! | ||
4 : | use lib '/home/redwards/perl/lib/perl5/site_perl/5.8.7/i686-linux/'; | ||
5 : | use CGI; | ||
6 : | use CGI::Carp qw/fatalsToBrowser/; | ||
7 : | use JSON::XS; | ||
8 : | |||
9 : | use FIG; | ||
10 : | use FIG_Config; | ||
11 : | use Data::Dumper; | ||
12 : | use FigFams; | ||
13 : | use FigFam; | ||
14 : | use IPC::Open3; | ||
15 : | use PinnedRegions; | ||
16 : | use URI::Escape; | ||
17 : | |||
18 : | |||
19 : | =pod | ||
20 : | |||
21 : | =head1 rest_seed.cgi | ||
22 : | |||
23 : | YAWS - Yet another web service! | ||
24 : | |||
25 : | Why: We're using rpc encoding which is basically URL encoding. In this, I call something like | ||
26 : | |||
27 : | http://bioseed.mcs.anl.gov/~redwards/FIG/rest_seed.cgi/multiply/2/3/4/5 | ||
28 : | |||
29 : | and get a response. Why do we need another web service? Mainly because of the Google work. Google pretty much exclusively deals with http requests, and eschews SOAP and other encodings as being too complex. | ||
30 : | |||
31 : | The data returned is all in JSON format (http://www.json.org/) which is the Javascript object notation format. JSON is a really light weight markup language that cna handle complex objects quite easily. | ||
32 : | |||
33 : | I am also aiming for lightweight code. In this case, we're not going to instantiate anything until we need it. Hopefully. | ||
34 : | |||
35 : | NOTE: I can't figure out how to encode forward slashes in URLs. We cant escape them with two slashes (\\) since the browser concatenates them, and we cant encode them with url_encode cos the server craps out. For now I am using the regexp forwardslash (case insensitive) | ||
36 : | |||
37 : | =cut | ||
38 : | |||
39 : | # a really simple RESTful web service that returns seed data | ||
40 : | |||
41 : | my $cgi=new CGI qw/:standard/; | ||
42 : | my $json= new JSON::XS; | ||
43 : | |||
44 : | print $cgi->header('text/plain'); | ||
45 : | |||
46 : | # get the query with path so we get the RESTful information | ||
47 : | my $abs = $cgi->url(-absolute=>1); # this will be the cgi-bin/rest.cgi stuff | ||
48 : | my $rest = $cgi->url(-path_info=>1); | ||
49 : | $rest =~ s/^.*$abs\///; | ||
50 : | |||
51 : | # we need to escape double slashes so that we don't split on them | ||
52 : | |||
53 : | my @rest=split m#/#, $rest; | ||
54 : | |||
55 : | #my @rest=split m#(?<!/)/(?!/)#, $rest; # this matches a '/' that is neither preceeded nor followed by a '/', so '//' does not match | ||
56 : | |||
57 : | map {$rest[$_] =~ s#forwardslash#/#gi} (0 .. $#rest); | ||
58 : | |||
59 : | my $method = shift @rest; | ||
60 : | |||
61 : | # there is no good way of passing a null value!! | ||
62 : | map {undef $rest[$_] if ($rest[$_] eq "undef" || $rest[$_] eq "null")} (0..$#rest); | ||
63 : | |||
64 : | #my $result = $json->encode({url=>$rest, query=>\@rest, result => &{$method}(@rest) }); | ||
65 : | my $result = $json->encode({result => &{$method}(@rest) }); | ||
66 : | |||
67 : | print $result, "\n"; | ||
68 : | |||
69 : | |||
70 : | |||
71 : | =pod | ||
72 : | |||
73 : | =head1 multiply. | ||
74 : | |||
75 : | This is a really simple method that just multiplies two numbers! It's great for testing things out | ||
76 : | |||
77 : | =cut | ||
78 : | |||
79 : | sub multiply { | ||
80 : | |||
81 : | my $x = 1; | ||
82 : | map {$x = $x * $_} @_; | ||
83 : | return $x; | ||
84 : | } | ||
85 : | |||
86 : | =head1 returnNumbers | ||
87 : | |||
88 : | And this method returns a reference to an array, so you can test JSON parsing | ||
89 : | |||
90 : | =cut | ||
91 : | |||
92 : | sub returnNumbers { | ||
93 : | return [21, 23]; | ||
94 : | } | ||
95 : | |||
96 : | |||
97 : | |||
98 : | |||
99 : | =begin WSDL | ||
100 : | _IN alias $string | ||
101 : | _RETURN $string | ||
102 : | _DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence | ||
103 : | =cut | ||
104 : | sub ali_to_seq { | ||
105 : | my ($arg) = @_; | ||
106 : | return stdin_caller("ali_to_seq", $arg); | ||
107 : | } | ||
108 : | |||
109 : | =begin WSDL | ||
110 : | _IN pegs $string | ||
111 : | _RETURN $string | ||
112 : | _DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome. | ||
113 : | =cut | ||
114 : | sub adjacent { | ||
115 : | my ($arg) = @_; | ||
116 : | $arg =~ s/\,\s*/\n/g; | ||
117 : | return stdin_caller("adjacent", $arg); | ||
118 : | } | ||
119 : | |||
120 : | =begin WSDL | ||
121 : | _IN peg $string | ||
122 : | _RETURN $string | ||
123 : | _DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster] | ||
124 : | =cut | ||
125 : | sub cluster_by_bbhs { | ||
126 : | my ($arg) = @_; | ||
127 : | return stdin_caller("cluster_by_bbhs", $arg); | ||
128 : | } | ||
129 : | |||
130 : | =begin WSDL | ||
131 : | _IN peg $string | ||
132 : | _RETURN $string | ||
133 : | _DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster] | ||
134 : | =cut | ||
135 : | sub cluster_by_sim { | ||
136 : | my ($arg) = @_; | ||
137 : | return stdin_caller("cluster_by_sim", $arg); | ||
138 : | } | ||
139 : | |||
140 : | =begin WSDL | ||
141 : | _IN peg $string | ||
142 : | _RETURN $string | ||
143 : | _DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function] | ||
144 : | =cut | ||
145 : | sub external_calls { | ||
146 : | my ($arg) = @_; | ||
147 : | return stdin_caller("external_calls", $arg); | ||
148 : | } | ||
149 : | |||
150 : | =begin WSDL | ||
151 : | _IN peg $string | ||
152 : | _RETURN $string | ||
153 : | _DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function | ||
154 : | =cut | ||
155 : | sub function_of { | ||
156 : | my ($arg) = @_; | ||
157 : | my $fig=new FIG; | ||
158 : | return scalar($fig->function_of($arg)); | ||
159 : | } | ||
160 : | |||
161 : | =begin WSDL | ||
162 : | _IN peg $string | ||
163 : | _RETURN $string | ||
164 : | _DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes | ||
165 : | =cut | ||
166 : | sub genomes_of { | ||
167 : | my ($arg) = @_; | ||
168 : | return stdin_caller("genomes_of", $arg); | ||
169 : | } | ||
170 : | |||
171 : | |||
172 : | =begin WSDL | ||
173 : | _IN genomeid $string | ||
174 : | _RETURN $string | ||
175 : | _DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome | ||
176 : | =cut | ||
177 : | sub genus_species { | ||
178 : | my ($arg) = @_; | ||
179 : | my $fig=new FIG; | ||
180 : | return $fig->genus_species($arg); | ||
181 : | } | ||
182 : | |||
183 : | |||
184 : | =begin WSDL | ||
185 : | _IN peg $string | ||
186 : | _RETURN $string | ||
187 : | _DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format. | ||
188 : | =cut | ||
189 : | sub fid2dna { | ||
190 : | my ($arg) = @_; | ||
191 : | return stdin_caller("fid2dna", $arg); | ||
192 : | } | ||
193 : | |||
194 : | =begin WSDL | ||
195 : | _IN peg $string | ||
196 : | _RETURN $string | ||
197 : | _DOC Get the DNA sequence for a set of protein identifiers. Input is a comma-joined list of pegs, output is the DNA sequence in fasta format. | ||
198 : | =cut | ||
199 : | sub fids2dna { | ||
200 : | my ($arg) = @_; | ||
201 : | my $seq; | ||
202 : | foreach my $peg (split /\,/, $arg) { | ||
203 : | $seq .= stdin_caller("fid2dna", $peg); | ||
204 : | } | ||
205 : | return $seq; | ||
206 : | } | ||
207 : | |||
208 : | |||
209 : | =begin WSDL | ||
210 : | _IN genomeid $string | ||
211 : | _RETURN @string | ||
212 : | _DOC Get a comma-separated list of all the contigs in a genome | ||
213 : | =cut | ||
214 : | sub contigs_of { | ||
215 : | my ($genome)=@_; | ||
216 : | my $fig = new FIG; | ||
217 : | return join(",", $fig->contigs_of($genome)); | ||
218 : | } | ||
219 : | |||
220 : | |||
221 : | =begin WSDL | ||
222 : | _IN genomeid $string | ||
223 : | _IN location1 $string | ||
224 : | _IN location2 $string | ||
225 : | _RETURN @string | ||
226 : | _DOC Get the DNA sequence for a region in a genome. Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence in fasta format. | ||
227 : | =cut | ||
228 : | sub dna_sequence { | ||
229 : | my ($genome, @locations)=@_; | ||
230 : | my $fig = new FIG; | ||
231 : | my $seq=$fig->dna_seq($genome,@locations); | ||
232 : | return $seq; | ||
233 : | } | ||
234 : | |||
235 : | |||
236 : | |||
237 : | =begin WSDL | ||
238 : | _IN genomeid $string | ||
239 : | _RETURN $string | ||
240 : | _DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome | ||
241 : | =cut | ||
242 : | sub pegs_of { | ||
243 : | my ($arg) = @_; | ||
244 : | my $fig = new FIG; | ||
245 : | return (join ",", $fig->pegs_of($arg)); | ||
246 : | } | ||
247 : | |||
248 : | =begin WSDL | ||
249 : | _IN genomeid $string | ||
250 : | _IN contig $string | ||
251 : | _RETURN $string | ||
252 : | _DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig | ||
253 : | =cut | ||
254 : | sub contig_ln { | ||
255 : | my $fig = new FIG; | ||
256 : | return $fig->contig_ln(@_); | ||
257 : | } | ||
258 : | |||
259 : | =begin WSDL | ||
260 : | _IN genomeid $string | ||
261 : | _RETURN $string | ||
262 : | _DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0) | ||
263 : | =cut | ||
264 : | sub is_archaeal { | ||
265 : | my ($arg) = @_; | ||
266 : | return stdin_caller("is_archaeal", $arg); | ||
267 : | } | ||
268 : | |||
269 : | =begin WSDL | ||
270 : | _IN genomeid $string | ||
271 : | _RETURN $string | ||
272 : | _DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0) | ||
273 : | =cut | ||
274 : | sub is_bacterial { | ||
275 : | my ($arg) = @_; | ||
276 : | return stdin_caller("is_bacterial", $arg); | ||
277 : | } | ||
278 : | |||
279 : | =begin WSDL | ||
280 : | _IN genomeid $string | ||
281 : | _RETURN $string | ||
282 : | _DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0) | ||
283 : | =cut | ||
284 : | sub is_eukaryotic { | ||
285 : | my ($arg) = @_; | ||
286 : | return stdin_caller("is_eukaryotic", $arg); | ||
287 : | } | ||
288 : | |||
289 : | =begin WSDL | ||
290 : | _IN genomeid $string | ||
291 : | _RETURN $string | ||
292 : | _DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0) | ||
293 : | =cut | ||
294 : | sub is_prokaryotic { | ||
295 : | my ($arg) = @_; | ||
296 : | return stdin_caller("is_prokaryotic", $arg); | ||
297 : | } | ||
298 : | |||
299 : | =begin WSDL | ||
300 : | _IN peg $string | ||
301 : | _RETURN $string | ||
302 : | _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence | ||
303 : | =cut | ||
304 : | sub translation_of { | ||
305 : | my ($arg) = @_; | ||
306 : | return stdin_caller("translation_of", $arg); | ||
307 : | } | ||
308 : | |||
309 : | =begin WSDL | ||
310 : | _IN peg $string | ||
311 : | _RETURN $string | ||
312 : | _DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation | ||
313 : | =cut | ||
314 : | sub get_translation { | ||
315 : | my ($arg1) = @_; | ||
316 : | my $fig = new FIG; | ||
317 : | my $result = $fig->get_translation($arg1); | ||
318 : | return $result; | ||
319 : | } | ||
320 : | |||
321 : | =begin WSDL | ||
322 : | _IN peg $string | ||
323 : | _RETURN @string | ||
324 : | _DOC Get the location of a peg on its contig. Input is a peg, output is list of loc on contig | ||
325 : | =cut | ||
326 : | sub feature_location { | ||
327 : | my ($arg1) = @_; | ||
328 : | my $fig = new FIG; | ||
329 : | my @result = ($fig->feature_location($arg1)); | ||
330 : | return @result; | ||
331 : | } | ||
332 : | |||
333 : | =begin WSDL | ||
334 : | _IN peg $string | ||
335 : | _RETURN $string | ||
336 : | _DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases | ||
337 : | =cut | ||
338 : | sub aliases_of { | ||
339 : | my ($arg) = @_; | ||
340 : | return stdin_caller("aliases_of", $arg); | ||
341 : | } | ||
342 : | |||
343 : | =begin WSDL | ||
344 : | _IN peg $string | ||
345 : | _RETURN $string | ||
346 : | _DOC Get the corresponding ids of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases | ||
347 : | =cut | ||
348 : | sub get_corresponding_ids { | ||
349 : | my ($arg) = @_; | ||
350 : | my $fig = new FIG; | ||
351 : | my @result = $fig->get_corresponding_ids($arg, 1); | ||
352 : | return join("\t", map {join(":", $_->[1], $_->[0])} @result); | ||
353 : | } | ||
354 : | |||
355 : | =begin WSDL | ||
356 : | _IN alias $string | ||
357 : | _RETURN $string | ||
358 : | _DOC Get the FIG ID (peg) for a given external identifier. Input is an identifier used by another database, output is our identifier | ||
359 : | =cut | ||
360 : | sub alias2fig { | ||
361 : | my ($arg) = @_; | ||
362 : | return stdin_caller("alias2fig", $arg); | ||
363 : | } | ||
364 : | |||
365 : | |||
366 : | |||
367 : | =begin WSDL | ||
368 : | _IN EC_number $string ec code | ||
369 : | _RETURN $string ec name | ||
370 : | _DOC Get the name for a given E.C. number. Input is an EC number, output is the name | ||
371 : | =cut | ||
372 : | sub ec_name { | ||
373 : | my $fig = new FIG; | ||
374 : | my $result = $fig->ec_name(@_); | ||
375 : | return $result; | ||
376 : | } | ||
377 : | |||
378 : | |||
379 : | =begin WSDL | ||
380 : | _IN reaction_number $string reaction code number | ||
381 : | _IN genomeid $string | ||
382 : | _RETURN $string | ||
383 : | _DOC Get a tab-separated list of [subsystem name, functional role, peg, subsystem variant code for that genome] for any given reaction id and genome id. Maps the reaction id to peg, peg to genome, and genome to variant code | ||
384 : | =cut | ||
385 : | sub reaction_to_role { | ||
386 : | my ($rxn, $genomeid) = @_; | ||
387 : | my $fig = new FIG; | ||
388 : | my @ecs = $fig->catalyzed_by($rxn); | ||
389 : | |||
390 : | my @return; | ||
391 : | foreach my $ec (@ecs) | ||
392 : | { | ||
393 : | my @ssr = grep {$fig->genome_of($_->[2]) eq $genomeid} $fig->subsystems_for_ec($ec); | ||
394 : | foreach my $ss (@ssr) | ||
395 : | { | ||
396 : | my $sub = $fig->get_subsystem($ss->[0]); | ||
397 : | push @$ss, $sub->get_variant_code_for_genome($fig->genome_of($ss->[2])) if ($sub); | ||
398 : | push @return, join("\t", @$ss); | ||
399 : | } | ||
400 : | } | ||
401 : | return SOAP::Data->type('string')->name('reaction_to_roleReturn')->value(join("\n", @return)); | ||
402 : | } | ||
403 : | |||
404 : | |||
405 : | |||
406 : | =begin WSDL | ||
407 : | _IN peg $string | ||
408 : | _RETURN $string | ||
409 : | _DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg | ||
410 : | =cut | ||
411 : | sub coupled_to { | ||
412 : | my $fig = new FIG; | ||
413 : | my $return=undef; | ||
414 : | my @result = $fig->coupled_to(@_); | ||
415 : | if (@result) | ||
416 : | { | ||
417 : | $return = join("\n", map {$_->[0].",".$_->[1]} @result); | ||
418 : | } | ||
419 : | return $return; | ||
420 : | } | ||
421 : | |||
422 : | =begin WSDL | ||
423 : | _IN peg $string | ||
424 : | _RETURN $string | ||
425 : | _DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg | ||
426 : | =cut | ||
427 : | sub abstract_coupled_to { | ||
428 : | my $fig = new FIG; | ||
429 : | my $return=undef; | ||
430 : | my @result = $fig->abstract_coupled_to(@_); | ||
431 : | if (@result) | ||
432 : | { | ||
433 : | $return = join("\n", map {$_->[0].",".$_->[1]} @result); | ||
434 : | } | ||
435 : | return $return; | ||
436 : | } | ||
437 : | |||
438 : | =begin WSDL | ||
439 : | _IN peg_id $string | ||
440 : | _IN n_pch_pins $string | ||
441 : | _IN n_sims $string | ||
442 : | _IN sim_cutoff $string | ||
443 : | _IN color_sim_cutoff $string | ||
444 : | _IN sort_by $string | ||
445 : | _RETURN $string | ||
446 : | _DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data | ||
447 : | =cut | ||
448 : | sub pinned_region_data { | ||
449 : | my ($peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_; | ||
450 : | |||
451 : | my $fig = new FIG; | ||
452 : | |||
453 : | defined($n_pch_pins) or $n_pch_pins = 5; | ||
454 : | defined($n_sims) or $n_sims = 0; | ||
455 : | defined($sim_cutoff) or $sim_cutoff = 1e-20; | ||
456 : | defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20; | ||
457 : | defined($sort_by) or $sort_by = ''; | ||
458 : | |||
459 : | defined($fast_color) or $fast_color = 0; | ||
460 : | defined($sims_from) or $sims_from = 'blast'; | ||
461 : | defined($region_size) or $region_size = 16000; | ||
462 : | |||
463 : | my $pin_desc = { | ||
464 : | 'pegs' => [$peg], | ||
465 : | 'collapse_close_genomes' => 0, | ||
466 : | 'n_pch_pins' => $n_pch_pins, | ||
467 : | 'n_sims' => $n_sims, | ||
468 : | 'show_genomes' => '', | ||
469 : | 'sim_cutoff' => $sim_cutoff, | ||
470 : | 'color_sim_cutoff' => $color_sim_cutoff, | ||
471 : | 'sort_by' => $sort_by, | ||
472 : | 'show_genomes' => [], | ||
473 : | }; | ||
474 : | |||
475 : | my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size); | ||
476 : | my $txt = Dumper($maps); | ||
477 : | return $txt; | ||
478 : | } | ||
479 : | |||
480 : | |||
481 : | =begin WSDL | ||
482 : | _IN complete $string | ||
483 : | _IN restrictions $string | ||
484 : | _IN domain $string | ||
485 : | _RETURN @string | ||
486 : | _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes | ||
487 : | =cut | ||
488 : | sub all_genomes { | ||
489 : | my $fig=new FIG; | ||
490 : | my @genomes=$fig->genomes(@_); | ||
491 : | return \@genomes; | ||
492 : | } | ||
493 : | |||
494 : | =begin WSDL | ||
495 : | _IN complete $string | ||
496 : | _IN restrictions $string | ||
497 : | _IN domain $string | ||
498 : | _RETURN @string | ||
499 : | _DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes. | ||
500 : | =cut | ||
501 : | sub genomes { | ||
502 : | my $fig = new FIG; | ||
503 : | my @result = $fig->genomes(@_); | ||
504 : | my %genomes = map {($_ => $fig->genus_species($_))} $fig->genomes(@_); | ||
505 : | |||
506 : | return \%genomes; | ||
507 : | } | ||
508 : | |||
509 : | |||
510 : | =begin WSDL | ||
511 : | _IN pattern1 $string | ||
512 : | _IN pattern2 $string | ||
513 : | _RETURN @string | ||
514 : | _DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set. | ||
515 : | =cut | ||
516 : | sub search_and_grep { | ||
517 : | my ($arg1, $arg2) = @_; | ||
518 : | |||
519 : | my $fig = new FIG; | ||
520 : | |||
521 : | my ($pegs, $roles) = $fig->search_index($arg1); | ||
522 : | |||
523 : | my (@result_list, $entry); | ||
524 : | |||
525 : | for $entry (@$pegs) { | ||
526 : | push (@result_list, grep(/$arg2/, @$entry)); | ||
527 : | } | ||
528 : | push (@result_list, grep(/$arg2/, @$roles)); | ||
529 : | chomp @result_list; | ||
530 : | my $return_value = join ("\n", @result_list); | ||
531 : | return $return_value; | ||
532 : | } | ||
533 : | |||
534 : | |||
535 : | |||
536 : | sub search_genome { | ||
537 : | my ($genome, $term)=@_; | ||
538 : | my $fig = new FIG; | ||
539 : | my ($pegs, $roles) = $fig->search_index($term); | ||
540 : | my $result; | ||
541 : | # For each feature, there is a tuple consisting of the (0) feature ID, (1) the organism name (genus and species), (2) the aliases, (3) the functional role, and (4) the relevant annotator | ||
542 : | |||
543 : | foreach my $ent (grep {$_->[0] =~ /fig\|$genome\./} @$pegs) { | ||
544 : | $result->{$ent->[0]}=$ent->[3]; | ||
545 : | } | ||
546 : | return $result; | ||
547 : | } | ||
548 : | |||
549 : | =begin WSDL | ||
550 : | _IN pattern $string | ||
551 : | _RETURN @string | ||
552 : | _DOC Search the database. Input is a pattern to search for, output is tab separated list of pegs and roles | ||
553 : | =cut | ||
554 : | sub simple_search { | ||
555 : | my ($arg1)=@_; | ||
556 : | |||
557 : | my $fig = new FIG; | ||
558 : | |||
559 : | my ($pegs, $roles) = $fig->search_index($arg1); | ||
560 : | |||
561 : | my (@result_list, $entry); | ||
562 : | |||
563 : | for $entry (@$pegs) { | ||
564 : | push (@result_list, (join("\t", @$entry))); | ||
565 : | } | ||
566 : | |||
567 : | # push (@result_list, (join("\t", @$roles))); | ||
568 : | chomp @result_list; | ||
569 : | my $return_value = join ("\n", @result_list); | ||
570 : | return $return_value; | ||
571 : | } | ||
572 : | |||
573 : | |||
574 : | =begin all_subsystem_classifications | ||
575 : | |||
576 : | Get all the subsystems classifications. Tested with JSON. | ||
577 : | |||
578 : | =cut | ||
579 : | sub all_subsystem_classifications { | ||
580 : | my $fig = new FIG; | ||
581 : | my $output; | ||
582 : | |||
583 : | my %found; | ||
584 : | map { | ||
585 : | my @classification=@{$fig->subsystem_classification($_)}; | ||
586 : | $#classification=1; | ||
587 : | push @classification, $_; | ||
588 : | push @$output, \@classification; | ||
589 : | } sort {$a cmp $b} ($fig->all_subsystems()); | ||
590 : | return $output; | ||
591 : | } | ||
592 : | |||
593 : | =begin all_subsystems_with_roles | ||
594 : | |||
595 : | Get all the subsystems and their roles. Tested with JSON | ||
596 : | |||
597 : | =cut | ||
598 : | sub all_subsystems_with_roles { | ||
599 : | my $fig = new FIG; | ||
600 : | my $output = $fig->all_subsystems_with_roles(); | ||
601 : | return $output; | ||
602 : | } | ||
603 : | |||
604 : | |||
605 : | =begin functions_to_subsystems | ||
606 : | |||
607 : | Get the subsystems for several functions. Tested with JSON | ||
608 : | |||
609 : | =cut | ||
610 : | sub functions_to_subsystems { | ||
611 : | my $fig=new FIG; | ||
612 : | my %fns; | ||
613 : | foreach my $fn (@_) { | ||
614 : | $fn = uri_unescape($fn); | ||
615 : | my @arr = $fig->function_to_subsystems($fn); | ||
616 : | $fns{$fn} = \@arr; | ||
617 : | } | ||
618 : | return \%fns; | ||
619 : | } | ||
620 : | |||
621 : | =begin function_to_subsystems | ||
622 : | |||
623 : | Get the subsystems for a function. Tested with JSON | ||
624 : | |||
625 : | =cut | ||
626 : | sub function_to_subsystems { | ||
627 : | my $fn = shift; | ||
628 : | $fn =~ s/\%20/ /g; | ||
629 : | my $fig=new FIG; | ||
630 : | my @arr = $fig->function_to_subsystems($fn); | ||
631 : | return \@arr; | ||
632 : | } | ||
633 : | |||
634 : | |||
635 : | =begin WSDL | ||
636 : | _RETURN $string list of families | ||
637 : | _DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families | ||
638 : | =cut | ||
639 : | sub all_families { | ||
640 : | my $fig = new FIG; | ||
641 : | my $figfams = new FigFams($fig); | ||
642 : | |||
643 : | my @out = $figfams->all_families; | ||
644 : | print STDERR Dumper(@out); | ||
645 : | return @out; | ||
646 : | } | ||
647 : | |||
648 : | =begin WSDL | ||
649 : | _RETURN $string list of families and funcs | ||
650 : | _DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions. | ||
651 : | =cut | ||
652 : | sub all_families_with_funcs { | ||
653 : | my $fig = new FIG; | ||
654 : | my $figfams = new FigFams($fig); | ||
655 : | |||
656 : | my @out =$figfams->all_families_with_funcs; | ||
657 : | return @out; | ||
658 : | } | ||
659 : | |||
660 : | =begin WSDL | ||
661 : | _IN families $string list of famids | ||
662 : | _RETURN $string 2 col table, famid, peg | ||
663 : | _DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg] | ||
664 : | =cut | ||
665 : | sub list_members { | ||
666 : | my ($famids) = @_; | ||
667 : | my $fig = new FIG; | ||
668 : | my $figfams = new FigFams($fig); | ||
669 : | my @in = split(/\t/, $famids); | ||
670 : | warn("Starting 2 list members $famids\n"); | ||
671 : | my @out = (); | ||
672 : | foreach my $famid (@in) | ||
673 : | { | ||
674 : | my $famO = new FigFam($fig,$famid); | ||
675 : | foreach my $peg ($famO->list_members) | ||
676 : | { | ||
677 : | push(@out,[$famid,$peg]); | ||
678 : | } | ||
679 : | } | ||
680 : | return @out; | ||
681 : | } | ||
682 : | |||
683 : | =begin WSDL | ||
684 : | _IN families $string list of pegs | ||
685 : | _RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated] | ||
686 : | _DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of families, returns a 3-column comma separated table [PEG, Function, Aliases] | ||
687 : | =cut | ||
688 : | sub CDS_data { | ||
689 : | my ($pegs) = @_; | ||
690 : | my $fig = new FIG; | ||
691 : | my $figfams = new FigFams($fig); | ||
692 : | my @in = split(/\t/, $pegs); | ||
693 : | |||
694 : | #warn("Starting CDS data $pegs\n"); | ||
695 : | #print STDERR &Dumper($pegs); | ||
696 : | |||
697 : | my @out = (); | ||
698 : | foreach my $peg (@in) | ||
699 : | { | ||
700 : | my @famids = $figfams->families_containing_peg($peg); | ||
701 : | foreach my $famid (@famids) | ||
702 : | { | ||
703 : | push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]); | ||
704 : | } | ||
705 : | } | ||
706 : | return @out; | ||
707 : | } | ||
708 : | |||
709 : | =begin WSDL | ||
710 : | _IN families $string list of pegs | ||
711 : | _RETURN $string a 2-column table [PEG,Sequence] | ||
712 : | _DOC Get the protein sequences for a list of FIGfams. Input is a tab-separated list of families, returns a 2-column comma separated table of [PEG, sequence] | ||
713 : | =cut | ||
714 : | sub CDS_sequences { | ||
715 : | my ($pegs) = @_; | ||
716 : | my $fig = new FIG; | ||
717 : | my $figfams = new FigFams($fig); | ||
718 : | my @in = split(/\t/, $pegs); | ||
719 : | |||
720 : | #warn("Starting CDS seq $pegs\n"); | ||
721 : | #print STDERR &Dumper($pegs); | ||
722 : | my @out = (); | ||
723 : | foreach my $peg (@in) | ||
724 : | { | ||
725 : | push(@out,[$peg,$fig->get_translation($peg)]); | ||
726 : | } | ||
727 : | return @out; | ||
728 : | } | ||
729 : | |||
730 : | =begin WSDL | ||
731 : | _IN sequences $string list of id seq pairs | ||
732 : | _RETURN $string returns a 2-column table [Id,FamilyID] | ||
733 : | _DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family. | ||
734 : | =cut | ||
735 : | sub is_member_of { | ||
736 : | my ($id_seqs) = @_; | ||
737 : | my $fig = new FIG; | ||
738 : | my $figfams = new FigFams($fig); | ||
739 : | #warn("Doing is member $id_seqs\n"); | ||
740 : | #print STDERR &Dumper($id_seqs); | ||
741 : | |||
742 : | my @in = split(/\n/, $id_seqs); | ||
743 : | my @out = (); | ||
744 : | foreach my $pair (@in) | ||
745 : | { | ||
746 : | my($id,$seq) = split(/\t/, $pair); | ||
747 : | my($famO,undef) = $figfams->place_in_family($seq); | ||
748 : | if ($famO) | ||
749 : | { | ||
750 : | push(@out,[$id,$famO->family_id]); | ||
751 : | } | ||
752 : | } | ||
753 : | return @out; | ||
754 : | } | ||
755 : | |||
756 : | =begin WSDL | ||
757 : | _IN peg $string | ||
758 : | _IN maxN $string | ||
759 : | _IN maxP $string | ||
760 : | _RETURN $string | ||
761 : | _DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used. | ||
762 : | =cut | ||
763 : | sub sims { | ||
764 : | my ($peg, $maxN, $maxP)=@_; | ||
765 : | unless (defined $maxN) {$maxN=50} | ||
766 : | unless (defined $maxP) {$maxP=1e-5} | ||
767 : | my $fig=new FIG; | ||
768 : | my $return=undef; | ||
769 : | foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx')) | ||
770 : | { | ||
771 : | $return .= join("\t", @$sim). "\n"; | ||
772 : | } | ||
773 : | |||
774 : | return $return; | ||
775 : | } | ||
776 : | |||
777 : | |||
778 : | |||
779 : | ##### INTERNAL METHODS | ||
780 : | |||
781 : | sub stdin_caller { | ||
782 : | my ($name, $arg) = @_; | ||
783 : | my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out); | ||
784 : | if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name"))) | ||
785 : | { | ||
786 : | die "Cannot run open3 $name: $!"; | ||
787 : | } | ||
788 : | |||
789 : | $wr->write($arg); | ||
790 : | close($wr); | ||
791 : | |||
792 : | @std_out= <$rd>; | ||
793 : | close($rd); | ||
794 : | waitpid $pid, 0; | ||
795 : | $return_value = join ("", @std_out); | ||
796 : | return $return_value; | ||
797 : | } | ||
798 : | |||
799 : |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |