[Bio] / FigWebServices / figfam_server_2.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/figfam_server_2.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.23 - (view) (download)

1 : olson 1.1 use strict;
2 :     use FIG;
3 : olson 1.7
4 : parrello 1.21 =head1 FIGfam Server
5 :    
6 :     This file contains the functions and used by the FIGfam Server. This server is
7 :     used to access data in the FIGfam database-- a large, complex directory tree of
8 :     structured files independent of the L<Sapling> database. It contains some of the
9 :     data found in Sapling, including genome names, functional assignments, and
10 :     aliases, but is not necessarily as complete or as up-to-date as the real
11 :     database. As a result, methods of this server that perform functions similar to
12 :     those of the Sapling Server (see L<SAP>) may return different results.
13 :    
14 :     For documentation of this server's functions, see L<FFserver>.
15 :    
16 :     =cut
17 :    
18 : olson 1.7 my $have_fcgi;
19 :     eval {
20 :     require CGI::Fast;
21 :     $have_fcgi = 1;
22 :     };
23 :    
24 : olson 1.1 use Data::Dumper;
25 :     use FFs;
26 :     use FF;
27 : olson 1.22 use KmersOld;
28 : olson 1.1 use FIG_Config;
29 :    
30 :     use YAML;
31 :    
32 :     my $ffdir = $FIG_Config::FigfamsData;
33 : olson 1.3
34 : olson 1.22 my $kmer_dir = $FIG_Config::KmerDataOld;
35 : olson 1.1
36 : olson 1.19 my $rna_tool = "/vol/search_for_rnas-2007-0625/search_for_rnas";
37 :    
38 : olson 1.1 my $fig = new FIG;
39 :     my $ffs = new FFs($ffdir);
40 :    
41 : olson 1.7
42 : olson 1.11 if ($kmer_dir eq '')
43 :     {
44 :     die "Kmer directory not specified";
45 :     }
46 :     elsif (! -d $kmer_dir)
47 : olson 1.7 {
48 : olson 1.11 die "Kmer directory $kmer_dir does not exist";
49 : olson 1.7 }
50 : olson 1.11
51 :     my $kmers = Kmers->new_using_C($kmer_dir);
52 :    
53 : olson 1.1
54 :     $| = 1;
55 : olson 1.11
56 : olson 1.1 my $clean_up = 0;
57 :    
58 :     my $header = "Content-type: text/plain\n\n";
59 :    
60 : olson 1.23 my $max_requests = 50;
61 : olson 1.10
62 : olson 1.7 #
63 :     # If no CGI vars, assume we are invoked as a fastcgi service.
64 :     #
65 : olson 1.10 my $n_requests = 0;
66 : olson 1.7 if ($have_fcgi && $ENV{REQUEST_METHOD} eq '')
67 : olson 1.1 {
68 : olson 1.13 #
69 :     # Make mysql autoreconnect.
70 :     #
71 :     if ($FIG_Config::dbms eq 'mysql')
72 :     {
73 :     my $dbh = $fig->db_handle()->{_dbh};
74 :     $dbh->{mysql_auto_reconnect} = 1;
75 :     }
76 :    
77 :     while ((my $cgi = new CGI::Fast()) &&
78 : olson 1.10 ($max_requests == 0 || $n_requests++ < $max_requests))
79 : olson 1.7 {
80 :     eval {
81 :     &process_request($cgi);
82 :     };
83 :     if ($@)
84 :     {
85 :     if (ref($@) ne 'ARRAY')
86 :     {
87 : olson 1.13 warn "code died, cgi=$cgi returning error\n";
88 : olson 1.7 print $cgi->header(-status => '500 error in body of cgi processing');
89 :     print $@;
90 :     }
91 :     }
92 :     endloop:
93 :     }
94 :     }
95 :     else
96 :     {
97 :     my $cgi = new CGI();
98 :     &process_request($cgi);
99 :     }
100 :    
101 :     exit;
102 :    
103 :    
104 :     sub process_request
105 :     {
106 :     my($cgi) = @_;
107 :    
108 : olson 1.1 my $function = $cgi->param('function');
109 : olson 1.10 # print STDERR "got function=$function\n";
110 : olson 1.1
111 : olson 1.2 my $arg_str = $cgi->param('args');
112 :     my @args;
113 :     if ($arg_str)
114 :     {
115 :     eval {
116 :     @args = YAML::Load($arg_str);
117 :     };
118 :     if ($@)
119 :     {
120 :     myerror($cgi, "500 bad YAML parse", "YAML parse failed");
121 :     next;
122 :     }
123 :     }
124 :    
125 : disz 1.4 $function or myerror($cgi, "500 missing argument", "missing function argument");
126 : olson 1.1
127 :     #print STDERR "$function\n";
128 :     if ($function eq "members_of_families") {
129 :     print $cgi->header();
130 : olson 1.2
131 :     foreach my $famid (@args) {
132 : olson 1.1 my $fam;
133 :     eval {$fam = new FF($famid, $ffs->{dir}); };
134 :     if ($fam) {
135 :     print YAML::Dump([$famid, $fam->family_function, [$fam->list_members()]]);
136 :     }
137 :     else
138 :     {
139 :     print YAML::Dump(undef);
140 :     }
141 :     }
142 : olson 1.6 }
143 :     elsif ($function eq "families_containing_peg") {
144 :     print $cgi->header();
145 :    
146 :     foreach my $fid (@args) {
147 :     my @fams;
148 :     eval { @fams = $ffs->families_containing_peg($fid); };
149 :     print YAML::Dump([$fid, \@fams]);
150 :     }
151 : arodri7 1.8 } elsif ($function eq "function_of") {
152 :     print $cgi->header();
153 :    
154 :     foreach my $fid (@args) {
155 :     my $func;
156 :     eval { $func = $ffs->function_of($fid); };
157 :     print YAML::Dump([$fid, $func]);
158 :     }
159 :     } elsif ($function eq "org_of") {
160 :     print $cgi->header();
161 :    
162 :     foreach my $fid (@args) {
163 :     my $org;
164 :     eval { $org = $ffs->org_of($fid); };
165 :     print YAML::Dump([$fid, $org]);
166 :     }
167 :     } elsif ($function eq "seq_of") {
168 :     print $cgi->header();
169 :    
170 :     foreach my $fid (@args) {
171 :     my $seq;
172 :     eval { $seq = $ffs->seq_of($fid); };
173 :     print YAML::Dump([$fid, $seq]);
174 :     }
175 :     } elsif ($function eq "aliases_of") {
176 :     print $cgi->header();
177 :    
178 :     foreach my $fid (@args) {
179 :     my $aliases;
180 :     eval { $aliases = $ffs->aliases_of($fid); };
181 :     print YAML::Dump([$fid, $aliases]);
182 :     }
183 :     } elsif ($function eq "families_implementing_role") {
184 :     print $cgi->header();
185 :    
186 :     foreach my $role (@args) {
187 :     my @fams;
188 :     eval { @fams = $ffs->families_implementing_role($role); };
189 :     print YAML::Dump([$role, \@fams]);
190 :     }
191 :     } elsif ($function eq "families_with_function") {
192 :     print $cgi->header();
193 :    
194 :     foreach my $function (@args) {
195 :     my @fams;
196 :     eval { @fams = $ffs->families_with_function($function); };
197 :     print YAML::Dump([$function, \@fams]);
198 :     }
199 :     } elsif ($function eq "families_in_genome") {
200 :     print $cgi->header();
201 :    
202 :     foreach my $genome (@args) {
203 :     my @fams;
204 :     eval { @fams = $ffs->families_in_genome($genome); };
205 :     print YAML::Dump([$genome, \@fams]);
206 :     }
207 :     } elsif ($function eq "get_subsystem_based_figfams") {
208 :     print $cgi->header();
209 :     print YAML::Dump($ffs->get_subsystem_based_figfams());
210 : olson 1.1 } elsif ($function eq "should_be_member") {
211 :    
212 :     print $cgi->header();
213 : olson 1.2
214 :     foreach my $parm (@args) {
215 :     my ($famid, $seq) = @$parm;
216 : olson 1.1 my $fam = new FF($famid, $ffs->{dir});
217 : olson 1.2 my $res;
218 : olson 1.1 if ($fam) {
219 :     my ($bool, $sims) = $fam->should_be_member($seq), "\n";
220 : olson 1.2 $res = $bool ? 1 : 0;
221 : olson 1.1 }
222 : olson 1.2 print YAML::Dump($res);
223 : olson 1.1 }
224 :     } elsif ($function eq "all_families") {
225 :     print $cgi->header();
226 : olson 1.2 print YAML::Dump($ffs->all_families(1));
227 : olson 1.1 } elsif ($function eq "assign_function_to_prot") {
228 :     print $cgi->header();
229 :     my @id = $cgi->param('id_seq');
230 : olson 1.9
231 :     my $blast = $cgi->param('blast');
232 :     my $min_hits = $cgi->param('min_hits');
233 : olson 1.12 my $assign_to_all = $cgi->param('assign_to_all');
234 :    
235 : olson 1.20 @id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
236 :    
237 : olson 1.12 my $extra_file;
238 : olson 1.20 #
239 :     # If we are invoking the blast-based assignments, use
240 :     # Kmers::assign_functions_to_prot_set to accelerate
241 :     # blasting. Otherwise, invoke Kmers::assign_function_to_prot
242 :     # on each input sequence so we use less memory.
243 :     #
244 : olson 1.12 if ($assign_to_all)
245 :     {
246 :     my $f = "$FIG_Config::KmerData/extra_prok_seqs.fasta";
247 :     if (-f $f && -f "$f.phr")
248 :     {
249 :     $extra_file = $f;
250 :     }
251 :     }
252 : olson 1.20 if ($extra_file)
253 :     {
254 :     #
255 :     # Reformat input to match expected layout.
256 :     #
257 :     my @inp = map { my($id, $seq) = split(/,/, $_); [$id, undef, $seq] } @id;
258 :     my @res = $kmers->assign_functions_to_prot_set(\@inp, $blast, $min_hits, $extra_file);
259 :     for my $ent (@res)
260 :     {
261 :     my($id, @rest) = @$ent;
262 :     print YAML::Dump([$id, \@rest]);
263 :     }
264 :     }
265 :     else
266 :     {
267 :     foreach my $parm (@id) {
268 :     my ($id, $seq) = split /,/, $parm;
269 :     my $res = $kmers->assign_function_to_prot($seq, $blast, $min_hits, $extra_file);
270 :     print YAML::Dump([$id,$res]);
271 :     }
272 : olson 1.1 }
273 : olson 1.9
274 : olson 1.14 } elsif ($function eq "call_genes") {
275 :     my @id = $cgi->param('id_seq');
276 :    
277 :     my $genetic_code = $cgi->param('genetic_code');
278 :     if ($genetic_code =~ /^(\d+)$/)
279 :     {
280 :     $genetic_code = $1;
281 :     }
282 :     else
283 :     {
284 :     $genetic_code = 11;
285 :     }
286 :    
287 :     @id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
288 :    
289 :     #
290 :     # Create fasta of the contig data.
291 :     #
292 :    
293 :     my $fh;
294 :     my $tmp = "$FIG_Config::temp/contigs.$$";
295 :     my $tmp2 = "$FIG_Config::temp/contigs.aa.$$";
296 :     my $tbl = "$FIG_Config::temp/tbl.$$";
297 :     my $tbl2 = "$FIG_Config::temp/tbl2.$$";
298 :     open($fh, ">", $tmp);
299 :    
300 :     foreach my $parm (@id) {
301 :     my ($id, $seq) = split /,/, $parm;
302 :     &FIG::display_id_and_seq($id, \$seq, $fh);
303 :     }
304 :     close($fh);
305 :    
306 :     my $res = system("$FIG_Config::bin/run_glimmer3 -code=$genetic_code 1.1 $tmp > $tbl");
307 :     if ($res != 0)
308 :     {
309 :     myerror($cgi, "500 glimmer run failed");
310 :     }
311 :    
312 :     my $fh2;
313 :     open($fh, "<", $tbl);
314 :     open($fh2, ">", $tbl2);
315 :     my $ctr = 1;
316 : olson 1.16 my $encoded_tbl = [];
317 : olson 1.14 while (<$fh>)
318 :     {
319 :     chomp;
320 :     my(@a) = split(/\t/);
321 :     $a[0] = sprintf("prot_%05d", $ctr++);
322 : olson 1.15 push(@a, $a[1]);
323 : olson 1.14 print $fh2 join("\t", @a), "\n";
324 : olson 1.16 my ($contig, $beg, $end) = ($a[1] =~ /^(\S+)_(\d+)_(\d+)$/);
325 :     push @$encoded_tbl, [$a[0], $contig, $beg, $end];
326 : olson 1.14 }
327 :     close($fh);
328 :     close($fh2);
329 :    
330 :     $res = system("$FIG_Config::bin/get_fasta_for_tbl_entries -code=$genetic_code $tmp < $tbl2 > $tmp2");
331 :     if ($res != 0)
332 :     {
333 :     myerror($cgi, "500 get_fasta_for_tbl_entries failed");
334 :     }
335 :    
336 : olson 1.15 if (!open($fh,"<", $tmp2))
337 :     {
338 :     myerror($cgi, "Cannot open output file $tmp2");
339 :     }
340 : olson 1.14 print $cgi->header();
341 : olson 1.16 my $out;
342 : olson 1.15 my $buf;
343 :     while (read($fh, $buf, 4096))
344 :     {
345 : olson 1.16 $out .= $buf;
346 : olson 1.15 }
347 :     close($fh);
348 : olson 1.18 print YAML::Dump([$out, $encoded_tbl]);
349 : olson 1.14 #unlink($tmp);
350 :     #unlink($tmp2);
351 :     #unlink($tbl);
352 :     #unlink($tbl2);
353 :    
354 : olson 1.19 } elsif ($function eq "find_rnas") {
355 :     my @id = $cgi->param('id_seq');
356 :    
357 :     my $genus = get_string_param($cgi, 'genus');
358 :     my $species = get_string_param($cgi, 'species');
359 :     my $domain = get_string_param($cgi, 'domain');
360 :    
361 :     @id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
362 :    
363 :     $genus or myerror($cgi, "500 missing genus", "figfam server missing genus argument");
364 :     $species or myerror($cgi, "500 missing species", "figfam server missing species argument");
365 :     $domain or myerror($cgi, "500 missing domain", "figfam server missing domain argument");
366 :    
367 :     #
368 :     # Create fasta of the contig data.
369 :     #
370 :    
371 :     my $fh;
372 :     my $tmp_dir = "$FIG_Config::temp/find_rnas.$$";
373 :     my $log = "$tmp_dir/log";
374 :     &FIG::verify_dir($tmp_dir);
375 :     my $tmp = "$tmp_dir/contigs";
376 :     my $tmp2 = "$tmp_dir/contigs2";
377 :     my $tbl = "$tmp_dir/tbl";
378 :     my $tbl2 = "$tmp_dir/tbl2";
379 :    
380 :     open($fh, ">", $tmp);
381 :    
382 :     foreach my $parm (@id) {
383 :     my ($id, $seq) = split /,/, $parm;
384 :     &FIG::display_id_and_seq($id, \$seq, $fh);
385 :     }
386 :     close($fh);
387 :    
388 :     my $cmd = "$rna_tool --tmpdir=$tmp_dir --contigs=$tmp --orgid=1 --domain=$domain --genus=$genus --species=$species";
389 :     my $res = system("$cmd > $tbl 2> $log");
390 :     if ($res != 0)
391 :     {
392 :     myerror($cgi, "500 $rna_tool run failed");
393 :     }
394 :    
395 :     my $fh2;
396 :     open($fh, "<", $tbl);
397 :     open($fh2, ">", $tbl2);
398 :     my $ctr = 1;
399 :     my $encoded_tbl = [];
400 :     while (<$fh>)
401 :     {
402 :     chomp;
403 :     my(@a) = split(/\t/);
404 :     $a[0] = sprintf("rna_%05d", $ctr++);
405 :     push(@a, $a[1]);
406 :     print $fh2 join("\t", @a), "\n";
407 :     my ($contig, $beg, $end) = ($a[1] =~ /^(\S+)_(\d+)_(\d+)$/);
408 :     push @$encoded_tbl, [$a[0], $contig, $beg, $end];
409 :     }
410 :     close($fh);
411 :     close($fh2);
412 :    
413 :     $res = system("$FIG_Config::bin/get_dna $tmp < $tbl2 > $tmp2");
414 :     if ($res != 0)
415 :     {
416 : olson 1.20 myerror($cgi, "500 get_dna failed");
417 : olson 1.19 }
418 :    
419 :     if (!open($fh,"<", $tmp2))
420 :     {
421 :     myerror($cgi, "Cannot open output file $tmp2");
422 :     }
423 :     print $cgi->header();
424 :     my $out;
425 :     my $buf;
426 :     while (read($fh, $buf, 4096))
427 :     {
428 :     $out .= $buf;
429 :     }
430 :     close($fh);
431 :     print YAML::Dump([$out, $encoded_tbl]);
432 :     #unlink($tmp);
433 :     #unlink($tmp2);
434 :     #unlink($tbl);
435 :     #unlink($tbl2);
436 :    
437 : olson 1.1 } elsif ($function eq "assign_functions_to_DNA") {
438 :     print $cgi->header();
439 :     my @id = $cgi->param('id_seq');
440 : olson 1.5 my $min_hits = $cgi->param('min_hits');
441 :     my $max_gap = $cgi->param('max_gap');
442 : olson 1.9 my $blast = $cgi->param('blast');
443 : olson 1.1 @id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
444 : olson 1.11 # open(L, ">>/tmp/log");
445 :     # L->autoflush(1);
446 :     # print L Dumper(\@id);
447 : olson 1.1 foreach my $parm (@id) {
448 :     my ($id, $seq) = split /,/, $parm;
449 : olson 1.11 # print L "try $id\n$seq\n";
450 :     my $res;
451 :     eval {
452 :     # print L Dumper($seq, $min_hits, $max_gap, $blast);
453 :     $res = $kmers->assign_functions_to_PEGs_in_DNA($seq, $min_hits, $max_gap, $blast);
454 :     # print L Dumper($res);
455 :     };
456 :     if ($@)
457 :     {
458 :     myerror($cgi, "500 failure on assign_functions_to_PEGs_in_DNA", $@);
459 :     }
460 :    
461 :     # print L Dumper($res);
462 : olson 1.9 print YAML::Dump(map { [$id, $_ ] } @$res);
463 : olson 1.11 # print L "OK\n";
464 : olson 1.1 }
465 :     } else {
466 : disz 1.4 myerror($cgi, "500 invalid function", "invalid function $function\n");
467 : olson 1.1 }
468 :     }
469 :    
470 :     exit;
471 :    
472 : olson 1.19 sub get_string_param
473 :     {
474 :     my($cgi, $name) = @_;
475 :    
476 :     my $str = $cgi->param($name);
477 :     if ($str =~ /^(\S+)/)
478 :     {
479 :     return $1;
480 :     }
481 :     else
482 :     {
483 :     return undef;
484 :     }
485 :    
486 :     }
487 :    
488 : olson 1.1 #
489 :     #The FIGfam server processes requests of the form:
490 :     #
491 :     # 1. PLACE-IN-FAMILY takes as input a list of protein sequences. It
492 :     # returns a list where each element describes the outcome of
493 :     # trying to place the corresponding input sequence into a
494 :     # FIGfam. Each output can be either
495 :     #
496 :     # COULD-NOT-PLACE-IN-FAMILY
497 :     # or
498 :     # ID FUNCTION
499 :     #
500 :     # where ID is of the form FIGxxxxxx and FUNCTION is the family
501 :     # function.
502 :     #
503 :     # 2. MEMBERS-OF-FAMILIES takes as input a list of FIGfam IDs. The
504 :     # output is a list of functions for those families
505 :     # (INVALID-FAMILY will be returned for IDs that do not correspond
506 :     # to an active family), as well as a list of the IDs in each family.
507 :     #
508 :     # 3. SHOULD-BE-MEMBER takes as input a list of 2-tuples
509 :     #
510 :     # [FIGfam-ID,protein sequence]
511 :     #
512 :     # It returns a list of boolean values indicating whether or not
513 :     # the indicated protein sequence can be placed in the designated
514 :     # family.
515 :     #
516 :     # 4. ALL-FAMILIES returns a list of [FIGfam-ID,function] tuples.
517 :     #
518 :     #
519 :     # 5. ASSIGN-FUNCTION-TO-PROT is similar to PLACE-IN-FAMILY, except
520 :     # that the returned list contains either
521 :     #
522 :     # COULD-NOT-PLACE-IN-FAMILY
523 :     # or
524 :     # ID FUNCTION
525 :     #
526 :     # That is, it does not indicate which FIGfam was used to
527 :     # determine the function. This allows higher-performance
528 :     # alternatives for cases in which multiple FIGfams implement the
529 :     # same function. The algorithm supported utilizes the underlying
530 :     # FIGfams, but characterizes sets that implement the same
531 :     # function and does not support distinguishing which FIGfam
532 :     # is actually the right subgrouping.
533 :     #
534 :     # 6. ASSIGN-FUNCTIONS-TO-DNA takes as input a list of DNA
535 :     # sequences. It returns a list where each element describes
536 :     # a region of DNA that is believed to be part of a gene encoding
537 :     # a protein sequence that would be placed into a FIGfam
538 :     # successfully, if the whole protein sequence could be
539 :     # determined. That is, the returned list will contain entrties
540 :     # of either the form
541 :     #
542 :     # COULD-NOT-PLACE-ANY-REGIONS-IN-FAMILIES
543 :     # or
544 :     # BEGIN1 END1 FUNCTION1 BEGIN2 END2 FUNCTION2 ...
545 :     #
546 :     # where BEGIN and END specify a region (if BEGIN is greater than
547 :     # END, the region described is on the reverse strand) and
548 :     # FUNCTION is the family function of the protein sequence that is
549 :     # believed to be encoded by DNA including the embedded region.
550 :     # Each input sequence can produce an arbitrary number of matched
551 :     # regions, there will be 3 fields for each matched region. Note
552 :     # that the described region may include frameshifts and embedded
553 :     # stop codons. The algorithm seeking meaningful sections of DNA
554 :     # assumes that it may have an incomplete, low-quality sequence
555 :     # (and uses an algorithm that attempts to locate meaningful
556 :     # matches even so).
557 :    
558 :     sub myerror
559 :     {
560 :     my($cgi, $stat, $msg) = @_;
561 :     print $cgi->header(-status => $stat);
562 :     print "$msg\n";
563 :     goto endloop;
564 :     }
565 :    
566 :    
567 :    
568 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3