[Bio] / FigWebServices / index.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/index.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.142 - (view) (download)

1 : olson 1.93 #
2 : golsen 1.141 # Copyright (c) 2003-2011 University of Chicago and Fellowship
3 : olson 1.93 # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.49 ### start
19 :    
20 : golsen 1.141 # Looks like 2 lines of leftover debug code:
21 :     # use FIGRules;
22 :     # my $file = &FIGRules::GetTempFileName;
23 : overbeek 1.120
24 : efrank 1.1 use FIG;
25 : olson 1.89 use FIG_CGI;
26 : olson 1.137 my $have_fcgi;
27 :     eval {
28 :     require CGI::Fast;
29 :     $have_fcgi = 1;
30 :     };
31 : efrank 1.1
32 : olson 1.137
33 :     use strict;
34 : parrello 1.78 use Tracer;
35 : golsen 1.97 use FIGjs qw( toolTipScript );
36 :     use GenoGraphics qw( render );
37 :     use gjoparseblast qw( next_blast_hsp );
38 : golsen 1.65
39 : efrank 1.92 use URI::Escape; # uri_escape
40 : olson 1.36 use POSIX;
41 : golsen 1.141 use Digest::MD5 qw( md5_hex );
42 : efrank 1.1 use HTML;
43 : golsen 1.65
44 : olson 1.134 my $have_sphinx;
45 : olson 1.139 my $sphinx_search_mode;
46 : olson 1.136 BEGIN {
47 :     eval {
48 :     require Sphinx::Search;
49 : olson 1.139 #
50 :     # Assign this up here where we know the symbol is available.
51 :     #
52 :     $sphinx_search_mode = &Sphinx::Search::SPH_MATCH_EXTENDED();
53 : olson 1.136 $have_sphinx++;
54 : olson 1.140 require SeedSearch;
55 : olson 1.136 };
56 :     }
57 : olson 1.134
58 : olson 1.90 my $this_script = "index.cgi";
59 : efrank 1.1
60 : olson 1.137 our $done;
61 :     sub done
62 :     {
63 :     &$done;
64 :     }
65 : olson 1.54
66 : olson 1.137 if ($have_fcgi && $ENV{REQUEST_METHOD} eq '')
67 : olson 1.54 {
68 :     {
69 : olson 1.137 package AllDone;
70 :     sub new
71 :     {
72 :     my($class) = @_;
73 : olson 1.142 return bless {}, $class;
74 : olson 1.137 }
75 : olson 1.54 }
76 : olson 1.137 $done = sub { die AllDone->new; };
77 :    
78 :     my $max_requests = $FIG_Config::fcgi_max_requests || 50;
79 :     my $n_requests = 0;
80 : olson 1.89
81 : olson 1.137 my $fig = new FIG;
82 : efrank 1.1
83 : olson 1.137 warn "begin loop\n";
84 :     while (($max_requests == 0 || $n_requests++ < $max_requests) &&
85 :     (my $cgi = new CGI::Fast()))
86 :     {
87 :     my $user = $cgi->param('user') || "";
88 :    
89 :     warn "have request\n";
90 :     eval {
91 :     &page_run($fig, $cgi, $user);
92 :     };
93 :     warn "Done\n";
94 :     if ($@)
95 :     {
96 :     if (ref($@) eq 'AllDone')
97 :     {
98 :     next;
99 :     }
100 :     warn "code died, cgi=$cgi returning error\n";
101 :     print $cgi->header(-status => '500 error in body of cgi processing');
102 :     print $@;
103 :     }
104 :     }
105 : overbeek 1.125 }
106 : olson 1.137 else
107 : overbeek 1.125 {
108 : olson 1.137 $done = sub { exit 0; };
109 :     my($fig, $cgi, $user);
110 :    
111 :     eval {
112 :     ($fig, $cgi, $user) = FIG_CGI::init(debug_save => 0,
113 :     debug_load => 0,
114 :     print_params => 0);
115 :     };
116 :    
117 :     if ($@ ne "")
118 : overbeek 1.120 {
119 : olson 1.137 my $err = $@;
120 :    
121 :     my(@html);
122 :    
123 :     push(@html, $cgi->p("Error connecting to SEED database."));
124 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
125 :     {
126 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
127 :     }
128 :     else
129 : overbeek 1.120 {
130 : olson 1.137 push(@html, $cgi->pre($err));
131 : overbeek 1.120 }
132 : olson 1.137 &HTML::show_page($cgi, \@html, 1);
133 :     exit;
134 : overbeek 1.120 }
135 : olson 1.137 &page_run($fig, $cgi, $user);
136 : overbeek 1.120 }
137 : olson 1.137 exit 0;
138 : overbeek 1.120
139 : olson 1.137 sub page_run
140 : overbeek 1.128 {
141 : olson 1.137 my($fig, $cgi, $user) = @_;
142 : overbeek 1.128
143 : olson 1.137 Trace("Connected to FIG.") if T(2);
144 :     my($map,@orgs,$user,$map,$org,$made_by,$from_func,$to_func);
145 :    
146 :     #for my $k (sort keys %ENV)
147 :     #{
148 :     # warn "$k=$ENV{$k}\n";
149 :     #}
150 :    
151 :     $ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};
152 :    
153 :     if (0)
154 : overbeek 1.49 {
155 : olson 1.137 my $VAR1;
156 :     eval(join("",`cat /tmp/index_parms`));
157 :     $cgi = $VAR1;
158 :     # print STDERR &Dumper($cgi);
159 : overbeek 1.49 }
160 : olson 1.137
161 :     if (0)
162 : overbeek 1.49 {
163 : olson 1.137 print $cgi->header;
164 :     my @params = $cgi->param;
165 :     print "<pre>\n";
166 :     foreach $_ (@params)
167 :     {
168 :     print "$_\t:",join(",",$cgi->param($_)),":\n";
169 :     }
170 :    
171 :     if (0)
172 :     {
173 :     if (open(TMP,">/tmp/index_parms"))
174 :     {
175 :     print TMP &Dumper($cgi);
176 :     close(TMP);
177 :     }
178 :     }
179 :     exit;
180 : overbeek 1.49 }
181 : olson 1.137
182 :     my $html = [];
183 :    
184 :     my($pattern,$seq_pat,$tool,$ids,$subsearch);
185 :    
186 :     my $user = $cgi->param('user');
187 :    
188 :     if ($cgi->param('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'))
189 : overbeek 1.51 {
190 : olson 1.137 Trace("Gene search chosen.") if T(2);
191 :     unshift @$html, "<TITLE>The SEED: Phylogenetic Signatures</TITLE>\n";
192 :     $ENV{"REQUEST_METHOD"} = "GET";
193 :     $ENV{"QUERY_STRING"} = "user=$user";
194 :     my @out = `./sigs.cgi`;
195 :     print @out;
196 :     &done;
197 :     }
198 :     elsif ($cgi->param('Search for Genes in Cluster, but Not Subsystems'))
199 :     {
200 :     $ENV{"REQUEST_METHOD"} = "GET";
201 :     $ENV{"QUERY_STRING"} = "user=$user";
202 :     my @out = `./clust_ss.cgi`;
203 :     print @out;
204 :     &done;
205 : overbeek 1.51 }
206 :    
207 : olson 1.137 #-----------------------------------------------------------------------
208 :     # Statistics for a single organism
209 :     #-----------------------------------------------------------------------
210 :     elsif ($cgi->param('statistics'))
211 :     {
212 :     Trace("Statistics chosen.") if T(2);
213 :     @orgs = $cgi->param('korgs');
214 :     @orgs = map { $_ =~ /(\d+\.\d+)/; $1 } @orgs;
215 :     if (@orgs != 1)
216 :     {
217 :     unshift @$html, "<TITLE>The SEED Statistics Page</TITLE>\n";
218 :     push(@$html,$cgi->h1('Please select a single organism to get statistcs'));
219 :     }
220 :     else
221 :     {
222 :     $ENV{"REQUEST_METHOD"} = "GET";
223 :     $ENV{"QUERY_STRING"} = "user=$user&genome=$orgs[0]";
224 :     my @out = `./genome_statistics.cgi`;
225 :     print @out;
226 :     &done;
227 :     }
228 : overbeek 1.51 }
229 : olson 1.137 #-----------------------------------------------------------------------
230 :     # Locate PEGs in Subsystems
231 :     #-----------------------------------------------------------------------
232 :     elsif ($cgi->param('Find PEGs') && ($subsearch = $cgi->param('subsearch')))
233 :     {
234 :     Trace("PEG find chosen.") if T(2);
235 :     my $genome = $cgi->param('genome');
236 :     my (@pegs,$peg);
237 :    
238 :     my @poss = $fig->by_alias($subsearch);
239 :     if (@poss > 0) { $subsearch = $poss[0] }
240 :    
241 :     if ($subsearch =~ /(fig\|\d+\.\d+\.peg\.\d+)/)
242 :     {
243 :     # handle searching for homologs that occur in subsystems
244 :     $peg = $1;
245 :     @pegs = ($peg);
246 :     push(@pegs,map { $_->id2 } $fig->sims( $peg, 500, 1.0e-10, "fig"));
247 :     if ($genome)
248 :     {
249 :     my $genomeQ = quotemeta $genome;
250 :     @pegs = grep { $_ =~ /^fig\|$genomeQ/ } @pegs;
251 :     }
252 :     }
253 :     else
254 :     {
255 :     # handle searching for PEGs with functional role in subsystems
256 :     @pegs = $fig->seqs_with_role($subsearch,"master",$genome);
257 :     }
258 :    
259 :     print $cgi->header;
260 :     if (@pegs == 0)
261 :     {
262 :     print $cgi->h1("Sorry, could not even find PEGs to check");
263 :     }
264 :     else
265 :     {
266 :     my(@pairs,$pair,@sub);
267 :     @pairs = map { $peg = $_;
268 :     @sub = $fig->peg_to_subsystems($peg);
269 :     map { [$peg,$_] } @sub } @pegs;
270 :     if (@pairs == 0)
271 :     {
272 :     print $cgi->h1("Sorry, could not map any PEGs to subsystems");
273 :     }
274 :     else
275 :     {
276 :     my($uni,$uni_func);
277 :     my $col_hdrs = ["PEG","Genome","Function","UniProt","UniProt Function","Subsystem"];
278 :     my $tab = [ map { $pair = $_; $uni = $fig->to_alias($pair->[0],"uni");
279 :     ($uni,$uni_func) = $uni ? (&HTML::uni_link($cgi,$uni),scalar $fig->function_of($uni)) : ("","");
280 :     [&HTML::fid_link($cgi,$pair->[0]),
281 :     $fig->org_of($pair->[0]),
282 :     scalar $fig->function_of($pair->[0]),
283 :     $uni,$uni_func,
284 :     &HTML::sub_link($cgi,$pair->[1])] } @pairs];
285 :     print &HTML::make_table($col_hdrs,$tab,"PEGs that Occur in Subsystems");
286 :     }
287 :     }
288 :     &done;
289 : overbeek 1.51 }
290 : olson 1.137 #-----------------------------------------------------------------------
291 :     # Align Sequences
292 :     #-----------------------------------------------------------------------
293 :     elsif ($cgi->param('Align Sequences'))
294 :     {
295 :     Trace("Sequence alignment chosen.");
296 :     my $seqs = $cgi->param('seqids');
297 :     $seqs =~ s/^\s+//;
298 :     $seqs =~ s/\s+$//;
299 :     my @seq_ids = split(/[ \t,;]+/,$seqs);
300 :     if (@seq_ids < 2)
301 :     {
302 :     print $cgi->header;
303 :     print $cgi->h1("Sorry, you need to specify at least two sequence IDs");
304 :     }
305 :     else
306 :     {
307 :     $ENV{"REQUEST_METHOD"} = "GET";
308 :     $_ = join('&checked=',@seq_ids);
309 :     $ENV{"QUERY_STRING"} = "user=$user&align=1&checked=" . $_;
310 :     my @out = `./fid_checked.cgi`;
311 :     print join("",@out);
312 :     }
313 :     &done;
314 : overbeek 1.31 }
315 : olson 1.137 #-----------------------------------------------------------------------
316 :     # Search (text) || Find Genes in Org that Might Play the Role
317 :     #-----------------------------------------------------------------------
318 :     elsif ( ( $pattern = $cgi->param('pattern') )
319 :     && ( $cgi->param('Search')
320 :     || $cgi->param('sphinx_search')
321 :     || $cgi->param('Search genome selected below')
322 :     || $cgi->param('Search Selected Organisms')
323 :     || $cgi->param('Find Genes in Org that Might Play the Role')
324 :     )
325 :     )
326 :     {
327 :     Trace("Pattern search chosen.") if T(2);
328 :     # Remove leading and trailing spaces from pattern -- GJO:
329 :     $pattern =~ s/^\s+//;
330 :     $pattern =~ s/\s+$//;
331 :     if ($cgi->param('Find Genes in Org that Might Play the Role') &&
332 :     (@orgs = $cgi->param('korgs')) && (@orgs == 1))
333 :     {
334 :     unshift @$html, "<TITLE>The SEED: Genes in that Might Play Specific Role</TITLE>\n";
335 :     @orgs = map { $_ =~ /(\d+\.\d+)/; $1 } @orgs;
336 :     $ENV{"REQUEST_METHOD"} = "GET";
337 :     $ENV{"QUERY_STRING"} = "user=$user&request=find_in_org&role=$pattern&org=$orgs[0]";
338 :     my @out = `./pom.cgi`;
339 :     print join("",@out);
340 :     &done;
341 :     }
342 :     else
343 :     {
344 :     unshift @$html, "<TITLE>The SEED: Search Results</TITLE>\n";
345 :     &show_indexed_objects($fig, $cgi, $html, $pattern, $user);
346 :     }
347 : efrank 1.1 }
348 : olson 1.137 #-----------------------------------------------------------------------
349 :     # Metabolic Overview
350 :     #-----------------------------------------------------------------------
351 :     elsif (($map = $cgi->param('kmap')) && $cgi->param('Metabolic Overview'))
352 : efrank 1.1 {
353 : olson 1.137 Trace("Metabolic overview chosen.") if T(2);
354 :     if ($map =~ /\(([^\)]*)\)$/)
355 :     {
356 :     $map = $1;
357 :     }
358 :     else
359 :     {
360 :     # ??? Gary ???
361 :     }
362 :    
363 :     #$map =~ s/^.*\((MAP\d+)\).*$/$1/;
364 :     @orgs = $cgi->param('korgs');
365 :     @orgs = map { $_ =~ /(\d+\.\d+)/; $1 } @orgs;
366 :     $ENV{"REQUEST_METHOD"} = "GET";
367 :     if (@orgs > 0)
368 :     {
369 :     $ENV{"QUERY_STRING"} = "user=$user&map=$map&org=$orgs[0]";
370 :     }
371 :     else
372 :     {
373 :     $ENV{"QUERY_STRING"} = "user=$user&map=$map";
374 :     }
375 :    
376 :     unshift @$html, "<TITLE>The SEED: Metabolic Overview</TITLE>\n";
377 :     my @out = `./show_map.cgi`;
378 :     &HTML::trim_output(\@out);
379 :     push( @$html, "<br>\n", @out );
380 : efrank 1.1 }
381 : olson 1.137
382 :     #-----------------------------------------------------------------------
383 :     # Search for Matches (sequence or pattern)
384 :     #-----------------------------------------------------------------------
385 :     elsif (($seq_pat = $cgi->param('seq_pat')) &&
386 :     ($tool = $cgi->param('Tool')) &&
387 :     $cgi->param('Search for Matches'))
388 :     {
389 :     Trace("Match search chosen.") if T(2);
390 :     @orgs = $cgi->param('korgs');
391 :     if (@orgs > 0)
392 :     {
393 :     @orgs = map { $_ =~ /(\d+\.\d+)/; $1 } @orgs;
394 :     }
395 :     else
396 :     {
397 :     @orgs = ("");
398 :     }
399 :    
400 :     if ($tool =~ /blast/)
401 :     {
402 :     unshift @$html, "<TITLE>The SEED: BLAST Search Results</TITLE>\n";
403 :     &run_blast($fig,$cgi,$html,$orgs[0],$tool,$seq_pat, $user);
404 :     }
405 : golsen 1.141 elsif ($tool =~ /Identical SEED proteins/)
406 :     {
407 :     unshift @$html, "<TITLE>The SEED: Identical SEED Proteins</TITLE>\n";
408 :     &identical_seed_proteins( $fig, $cgi, $html, $seq_pat, $user );
409 :     }
410 : olson 1.137 elsif ($tool =~ /Protein scan_for_matches/)
411 :     {
412 :     unshift @$html, "<TITLE>The SEED: Protein Pattern Match Results</TITLE>\n";
413 :     &run_prot_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
414 :     }
415 :     elsif ($tool =~ /DNA scan_for_matches/)
416 :     {
417 :     unshift @$html, "<TITLE>The SEED: Nucleotide Pattern Match Results</TITLE>\n";
418 :     &run_dna_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
419 :     }
420 : olson 1.38 }
421 : olson 1.137 elsif (($made_by = $cgi->param('made_by')) && $cgi->param('Extract Assignments'))
422 : olson 1.38 {
423 : olson 1.137 Trace("Assignment export chosen.") if T(2);
424 :     &export_assignments($fig,$cgi,$html,$made_by);
425 : olson 1.38 }
426 : olson 1.137 elsif ($cgi->param('Generate Assignments via Translation') &&
427 :     ($from_func = $cgi->param('from_func')) &&
428 :     ($to_func = $cgi->param('to_func')))
429 : efrank 1.1 {
430 : olson 1.137 Trace("Assignment translate chosen.") if T(2);
431 :     &translate_assignments($fig,$cgi,$html,$from_func,$to_func);
432 : efrank 1.1 }
433 : olson 1.137
434 :     elsif ($cgi->param('Extract Matched Sequences') && ($ids = $cgi->param('ids')))
435 : efrank 1.1 {
436 : olson 1.137 Trace("Matched sequence extract chosen.") if T(2);
437 :     my @ids = split(/,/,$ids);
438 :    
439 :     # Truncate the list if requested:
440 :    
441 :     my($list_to,$i);
442 :     if ($list_to = $cgi->param('list_to'))
443 :     {
444 :     for ($i=0; ($i < @ids) && ($ids[$i] ne $list_to); $i++) {}
445 :     if ($i < @ids)
446 :     {
447 :     $#ids = $i;
448 :     }
449 :     }
450 :    
451 :     # Print the sequences:
452 :     # Add organisms -- GJO
453 :    
454 :     my( $id, $seq, $desc, $func, $org );
455 :     push( @$html, $cgi->pre );
456 :     foreach $id (@ids)
457 :     {
458 :     if ($seq = $fig->get_translation($id))
459 :     {
460 :     $desc = $id;
461 :     if ( $func = $fig->function_of( $id ) )
462 :     {
463 :     $desc .= " $func";
464 :     }
465 :     if ( $org = $fig->genus_species( $fig->genome_of( $id ) ) )
466 :     {
467 :     $desc .= " [$org]" if $org;
468 :     }
469 :     push( @$html, ">$desc\n" );
470 :     for ($i=0; ($i < length($seq)); $i += 60)
471 :     {
472 :     # substr does not mind a request for more than length
473 :     push( @$html, substr( $seq, $i, 60 ) . "\n" );
474 :     }
475 :     }
476 :     }
477 :     push(@$html,$cgi->end_pre);
478 : overbeek 1.30 }
479 : olson 1.137
480 :     #-----------------------------------------------------------------------
481 :     # Initial search page
482 :     #-----------------------------------------------------------------------
483 : overbeek 1.30 else
484 :     {
485 : olson 1.137 Trace("SEED Entry page chosen.") if T(2);
486 :     unshift @$html, "<TITLE>The SEED: Entry Page</TITLE>\n";
487 :     &show_initial($fig,$cgi,$html);
488 :     }
489 :     Trace("Showing page.") if T(3);
490 :     &HTML::show_page($cgi,$html,1);
491 :     Trace("Page shown.") if T(3);
492 : overbeek 1.30 }
493 : overbeek 1.17
494 :     #==============================================================================
495 :     # Initial page (alias search)
496 :     #==============================================================================
497 :    
498 : efrank 1.1 sub show_initial {
499 :     my($fig,$cgi,$html) = @_;
500 :     my($map,$name,$olrg,$gs);
501 :    
502 : overbeek 1.83
503 :     #
504 :     # Display the message of the day, if present.
505 :     #
506 :    
507 :     show_motd($fig, $cgi, $html);
508 :    
509 : golsen 1.113 # The original $a and $b conflicted with explicit sort variables (ouch):
510 :     # "Can't use "my $a" in sort comparison" -- GJO
511 :    
512 :     my( $at, $bt, $et, $v, $envt ) = $fig->genome_counts;
513 : parrello 1.115 push(@$html,$cgi->h2("Contains $at archaeal, $bt bacterial, $et eukaryal, $v viral and $envt environmental genomes"));
514 : golsen 1.113 my( $ac, $bc, $ec ) = $fig->genome_counts("complete");
515 :     push(@$html,$cgi->h2("Of these, $ac archaeal, $bc bacterial and $ec eukaryal genomes are more-or-less complete"),$cgi->hr);
516 : efrank 1.1
517 :     push(@$html,
518 : parrello 1.75 $cgi->h2('Work on Subsystems'),
519 : overbeek 1.46
520 : parrello 1.75 # $cgi->start_form(-action => "ssa2.cgi"),
521 :     # "Enter user: ",
522 :     # $cgi->textfield(-name => "user", -size => 20),
523 :     # $cgi->submit('Work on Subsystems'),
524 :     # $cgi->end_form,
525 :    
526 :     # $cgi->h2('Work on Subsystems Using New, Experimental Code'),
527 : overbeek 1.111 # "This is the <i>new</i> subsystems code, and is now officially released.",
528 : parrello 1.75 $cgi->start_form(-action => "subsys.cgi"),
529 :     "Enter user: ",
530 :     $cgi->textfield(-name => "user", -size => 20),
531 :     $cgi->submit('Work on Subsystems'),
532 :     $cgi->end_form,
533 : golsen 1.133 );
534 :    
535 :     push(@$html,
536 :     "Or, on this machine you can: <a href='SubsysEditor.cgi'>Use the new Subsystem Editor</a>\n",
537 : olson 1.140 );
538 : golsen 1.133
539 :     push(@$html,
540 : parrello 1.75 $cgi->hr,
541 : golsen 1.133 $cgi->h2('Work on FIGfams'),
542 : overbeek 1.114 $cgi->start_form(-action => "ff.cgi"),
543 :     "Enter user: ",
544 :     $cgi->textfield(-name => "user", -size => 20),
545 :     $cgi->submit('Work on FIGfams'),
546 :     $cgi->end_form,
547 :     $cgi->hr,
548 : parrello 1.75 );
549 : olson 1.42
550 : golsen 1.95 push( @$html,
551 :     $cgi->start_form(-action => $this_script),
552 :     "<table>\n",
553 :     "<tr>",
554 : golsen 1.100 "<td colspan=2>", $cgi->h2('Searching for Genes or Functional Roles Using Text'), "</td>",
555 :     "<td align=right><a href='sdk_uniprot_search.cgi'>UniProt WebService Search</a></td>",
556 :     "</tr>\n",
557 :     "<tr>",
558 : golsen 1.95 "<td>Search Pattern: </td>",
559 :     "<td>", $cgi->textfield(-name => "pattern", -size => 65), "</td>",
560 :     "<td>", "Search <select name=search_kind>
561 :     <option value=DIRECT >Directly</option>
562 :     <option value=GO >Via Gene Ontology</option>
563 :     <option value=HUGO >Via HUGO Gene Nomenclature Committee</option>
564 :     </select></td>",
565 :     "</tr>\n",
566 :     "<tr>",
567 :     "<td>User ID:</td>",
568 :     "<td>",
569 :     $cgi->textfield(-name => "user", -size => 20), " [optional] &nbsp; &nbsp; ",
570 :     "Max Genes: ", $cgi->textfield(-name => "maxpeg", -size => 6, -value => 100), "&nbsp; &nbsp; ",
571 :     "Max Roles: ", $cgi->textfield(-name => "maxrole", -size => 6, -value => 100), "</td>",
572 : olson 1.135 "<td>", $cgi->checkbox(-name => "substring_match", -label => 'Allow substring match'),
573 :     $cgi->checkbox(-name => "suppress_aliases", -label => 'Suppress aliases'), "</td>",
574 : golsen 1.95 "</tr>\n",
575 :     "</table>\n",
576 : olson 1.137 ($FIG_Config::suppress_non_sphinx_search ? () : $cgi->submit('Search')),
577 :     ($have_sphinx ? $cgi->submit(-name => "sphinx_search", -value => 'Search with Sphinx') : ()),
578 : olson 1.136 $cgi->submit('Search genome selected below'),
579 : golsen 1.95 $cgi->reset('Clear'),
580 :     $cgi->hr
581 :     );
582 : olson 1.41
583 : golsen 1.113 #---------------------------------------------------------------------------
584 :     # Build the list of genomes from which the user can pick:
585 :     #---------------------------------------------------------------------------
586 :    
587 :     my $link;
588 :     ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/show_log.cgi/;
589 :    
590 :     push( @$html, $cgi->h2('If You Need to Pick a Genome for Options Below'),"&nbsp;[<a href=$link>Log</a>]");
591 :    
592 :     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Plasmids', 'Viruses', 'Environmental samples' );
593 : golsen 1.47
594 :     # Canonical names must match the keywords used in the DBMS. They are
595 :     # defined in compute_genome_counts.pl
596 : golsen 1.113
597 : golsen 1.47 my %canonical = (
598 :     'All' => undef,
599 :     'Archaea' => 'Archaea',
600 :     'Bacteria' => 'Bacteria',
601 :     'Eucarya' => 'Eukaryota',
602 : golsen 1.113 'Plasmids' => 'Plasmid',
603 : golsen 1.47 'Viruses' => 'Virus',
604 :     'Environmental samples' => 'Environmental Sample'
605 :     );
606 :    
607 :     my $req_dom = $cgi->param( 'domain' ) || 'All';
608 :     my @domains = $cgi->radio_group( -name => 'domain',
609 :     -default => $req_dom,
610 :     -override => 1,
611 :     -values => [ @display ]
612 :     );
613 :    
614 :     my $n_domain = 0;
615 : golsen 1.113 my %dom_num = map { ( $_ => $n_domain++ ) } @display;
616 : golsen 1.47 my $req_dom_num = $dom_num{ $req_dom } || 0;
617 :    
618 : golsen 1.113 # Plasmids, Viruses and Environmental samples must have completeness
619 :     # = All (that is how they are in the database). Otherwise, default is
620 :     # Only "complete".
621 :    
622 : golsen 1.47 my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
623 :     : $cgi->param( 'complete' ) || 'Only "complete"';
624 :     my @complete = $cgi->radio_group( -name => 'complete',
625 :     -default => $req_comp,
626 :     -override => 1,
627 :     -values => [ 'All', 'Only "complete"' ]
628 :     );
629 : golsen 1.113
630 : golsen 1.47 # Use $fig->genomes( complete, restricted, domain ) to get org list:
631 : golsen 1.113
632 : golsen 1.47 my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
633 : golsen 1.113
634 : overbeek 1.107 my @orgs;
635 : olson 1.123 my %org_labels;
636 : overbeek 1.107 foreach my $org ($fig->genomes( $complete, undef, $canonical{ $req_dom } ))
637 :     {
638 : olson 1.123 my $label = compute_genome_label($fig, $org);
639 :     $org_labels{$org} = $label;
640 :     push(@orgs, $org);
641 : overbeek 1.107 }
642 : golsen 1.113
643 :     # Make the sort case independent -- GJO
644 :    
645 :     # @orgs = sort { $a cmp $b } @orgs;
646 : olson 1.123 @orgs = sort { lc( $org_labels{$a} ) cmp lc( $org_labels{$b} ) } @orgs;
647 : overbeek 1.107
648 : olson 1.123 my $n_genomes = @orgs;
649 : overbeek 1.107
650 : olson 1.123 #
651 :     # Make a list of the org names for the code that doesn't use the
652 :     # name/value separation in the scrolling list.
653 :     #
654 : golsen 1.47
655 : olson 1.123 my @org_names = map { $org_labels{$_} } @orgs;
656 : golsen 1.47
657 : golsen 1.113 push( @$html, "<TABLE>\n",
658 :     " <TR VAlign=top>\n",
659 :     " <TD>",
660 : parrello 1.75 $cgi->scrolling_list( -name => 'korgs',
661 : golsen 1.47 -values => [ @orgs ],
662 : olson 1.123 -labels => \%org_labels,
663 : redwards 1.63 -size => 10,
664 : golsen 1.60 ), $cgi->br,
665 : golsen 1.47 "$n_genomes genomes shown ",
666 : golsen 1.60 $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
667 : parrello 1.75 "Show some ", $cgi->submit('statistics')," of the selected genome",
668 : golsen 1.113 " </TD>",
669 :    
670 :     " <TD><b>Domain(s) to show:</b>\n",
671 :     " <TABLE>\n",
672 :     " <TR VAlign=bottom>\n",
673 :     " <TD>", join( "<br>", @domains[0..3]), "</TD>\n",
674 :     " <TD>&nbsp;&nbsp;&nbsp;</TD>\n",
675 :     " <TD>", join( "<br>", @domains[4..$#domains]), "</TD>\n",
676 :     " </TR>\n",
677 :     " </TABLE>\n",
678 :     " ", join( "<br>", "<b>Completeness?</b>", @complete), "\n",
679 :     " </TD>",
680 :     " </TR>\n",
681 : golsen 1.47 "</TABLE>\n",
682 : golsen 1.60 $cgi->hr
683 :     );
684 : overbeek 1.49
685 : overbeek 1.112
686 : golsen 1.47 push( @$html, $cgi->h2('Finding Candidates for a Functional Role'),
687 : parrello 1.75 "Make sure that you type the functional role you want to search for in the Search Pattern above",
688 :     $cgi->br,
689 :     $cgi->submit('Find Genes in Org that Might Play the Role'),
690 :     $cgi->hr);
691 : overbeek 1.17
692 : golsen 1.60 my @maps = sort map { $map = $_; $name = $fig->map_name($map); "$name ($map)" } $fig->all_maps;
693 :    
694 : golsen 1.47 push( @$html, $cgi->h2('Metabolic Overviews and Subsystem Maps (via KEGG & SEED) - Choose Map'),
695 : golsen 1.141 $cgi->submit('Metabolic Overview'),
696 :     $cgi->br,
697 :     $cgi->br,
698 :     $cgi->scrolling_list(-name => 'kmap',
699 :     -values => [@maps],
700 :     -size => 10
701 :     ),
702 :     $cgi->hr);
703 : overbeek 1.17
704 : golsen 1.141 push( @$html, $cgi->h2('Searching DNA or Protein Sequences (in a selected organism)') );
705 :     my $func_list = [ 'blastp',
706 :     'blastx',
707 :     'blastn',
708 :     'tblastn',
709 :     'blastp against complete genomes',
710 :     'Identical SEED proteins',
711 :     'Protein scan_for_matches',
712 :     'DNA scan_for_matches'
713 :     ];
714 :     push( @$html, "<TABLE>\n",
715 :     " <TR>\n",
716 :     " <TD>Sequence/Pattern: </TD>",
717 :     " <TD Colspan=3>", $cgi->textarea( -name => 'seq_pat',
718 :     -rows => 10,
719 :     -cols => 80
720 :     ), "</TD>\n",
721 :     " </TR>\n",
722 :     " <TR>\n",
723 :     " <TD>Search Program: </TD>",
724 :     " <TD>", $cgi->popup_menu( -name => 'Tool',
725 :     -values => $func_list,
726 :     -default => 'blastp'
727 :     ), " </TD>",
728 :     " <TD> Program Options:</TD>",
729 :     " <TD>", $cgi->textfield( -name => "blast_options", -size => 27 ), "</TD>",
730 :     " </TR>\n",
731 :     "</TABLE>\n",
732 :     $cgi->submit('Search for Matches'),
733 :     $cgi->hr);
734 : overbeek 1.17
735 : olson 1.41 #
736 :     # Make assignment export tbl.
737 :     #
738 :    
739 :     my @atbl;
740 : golsen 1.64 push(@atbl, [ "Extract assignments made by ",
741 : parrello 1.75 $cgi->textfield(-name => "made_by", -size => 25) . " (do not prefix with <b>master:</b>)" ]);
742 : golsen 1.64 push(@atbl, [ "Save as user: ",
743 : parrello 1.75 $cgi->textfield(-name => "save_user", -size => 25) . " (do not prefix with <b>master:</b>)" ] );
744 : golsen 1.64 push(@atbl, [ "After date (MM/DD/YYYY) ",
745 : parrello 1.75 $cgi->textfield(-name => "after_date", -size => 15)]);
746 : olson 1.41
747 :     push(@$html,
748 : overbeek 1.84 $cgi->h2($cgi->a({name => "exporting_assignments"}, 'Exporting Assignments')),
749 : parrello 1.75 &HTML::make_table(undef, \@atbl, '', border => 0),
750 :     $cgi->checkbox(-label => 'Tab-delimited Spreadsheet', -name => 'tabs', -value => 1),
751 :     $cgi->br,
752 :     $cgi->checkbox(-label => 'Save Assignments', -name => 'save_assignments', -value => 1),
753 :     $cgi->br,
754 :     $cgi->submit('Extract Assignments'),
755 :     $cgi->br, $cgi->br, $cgi->br,
756 :     "Alternatively, you can generate a set of assignments as translations of existing assignments. ",
757 :     "To do so, you need to make sure that you fill in the <b>Save as user</b> field just above. You ",
758 :     "should use something like <b>RossO</b> (leave out the <b>master:</b>). When you look at the assignments (and decide which ",
759 :     "to actually install), they will be made available under that name (but, when you access them, ",
760 :     "you will normally be using something like <b>master:RossO</b>)",
761 :     $cgi->br,$cgi->br,
762 :     "From: ",
763 :     $cgi->textarea(-name => 'from_func', -rows => 4, -cols => 100),
764 :     $cgi->br,$cgi->br,
765 :     "To:&nbsp;&nbsp;&nbsp;&nbsp; ",$cgi->textfield(-name => "to_func", -size => 100),
766 :     $cgi->br,
767 : golsen 1.103 "<TABLE Width=100%><TR><TD>",
768 :     $cgi->submit('Generate Assignments via Translation'),
769 :     "</TD><TD NoWrap Width=1%>",
770 : overbeek 1.76 $cgi->a({class=>"help", target=>"help", href=>"Html/seedtips.html#replace_names"}, "Help with generate assignments via translation"),
771 : golsen 1.103 "</TD></TR></TABLE>\n"
772 : golsen 1.95 );
773 :    
774 :     push(@$html,
775 : parrello 1.75 $cgi->hr,
776 :     $cgi->h2('Searching for Interesting Genes'),
777 :     $cgi->submit('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'),
778 : overbeek 1.128 $cgi->submit('Search for Genes in Cluster, but Not Subsystems'),
779 : overbeek 1.17 $cgi->end_form
780 : parrello 1.75 );
781 : overbeek 1.14
782 :     push(@$html,
783 : parrello 1.75 $cgi->hr,
784 :     $cgi->h2('Process Saved Assignments Sets'),
785 :     $cgi->start_form(-action => "assignments.cgi"),
786 :     "Here you should include the <b>master:</b>. Thus use something like <b>master:RossO</b>",$cgi->br,
787 :     $cgi->br,
788 :     "Enter user: ",
789 :     $cgi->textfield(-name => "user", -size => 20),
790 :     $cgi->submit('Process Assignment Sets'),
791 : overbeek 1.17 $cgi->end_form
792 : parrello 1.75 );
793 : efrank 1.1
794 : overbeek 1.19 push(@$html,
795 : parrello 1.75 $cgi->hr,
796 : overbeek 1.112 $cgi->h2('Locate clustered genes not in subsystems'),
797 :     $cgi->start_form(-action => "find_ss_genes.cgi"),
798 :     $cgi->br,
799 :     "Enter user: ",
800 :     $cgi->textfield(-name => "user", -size => 20),
801 :     $cgi->submit('Find Clustered Genes'),
802 :     $cgi->end_form
803 :     );
804 :    
805 :     push(@$html,
806 :     $cgi->hr,
807 : parrello 1.75 $cgi->h2('Align Sequences'),
808 : olson 1.89 $cgi->start_form(-action => $this_script),
809 : parrello 1.75 "Enter user: ",
810 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
811 :     $cgi->submit('Align Sequences'),": ",
812 :     $cgi->textfield(-name => "seqids", -size => 100),
813 : overbeek 1.31 $cgi->end_form
814 : parrello 1.75 );
815 : overbeek 1.51
816 :     push(@$html,
817 : parrello 1.75 $cgi->hr,
818 :     $cgi->h2('Locate PEGs in Subsystems'),
819 :     "If you wish to locate PEGs in subsystems, you have two approaches supported. You can
820 : overbeek 1.56 give a FIG id, and you will get a list of all homologs in the designated genome that occur in subsystems.
821 :     Alternatively, you can specify a functional role, and all PEGs in the genome that match that role will be shown.",
822 : olson 1.89 $cgi->start_form(-action => $this_script),
823 : parrello 1.75 "Enter user: ",
824 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
825 :     $cgi->br,"Genome: ",$cgi->textfield(-name => "genome", -size => 15),$cgi->br,
826 :     "Search: ",$cgi->textfield(-name => "subsearch", -size => 100),$cgi->br,
827 :     $cgi->submit('Find PEGs'),": ",
828 : overbeek 1.51 $cgi->end_form
829 : parrello 1.75 );
830 : overbeek 1.116 push(@$html,
831 :     $cgi->hr,
832 :     $cgi->h2('Compare Metabolic Reconstructions'),
833 :     "If you wish to compare the reconstructions for two distinct genomes, use this tool.
834 :     You should specify two genomes, or a P1K server output directory (as genome1) and a second genome (which
835 :     must be a valid genome ID that exists in this SEED). You can ask for functional roles/subsystems that the
836 :     genomes have in common, those that exist in genome1 only, or those that exist in only genome2.",
837 :     $cgi->start_form(-action => 'comp_MR.cgi'),
838 :     "Enter user: ",
839 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
840 :     $cgi->br,"Genome1: ",$cgi->textfield(-name => "genome1", -size => 40),$cgi->br,
841 :     $cgi->br,"Genome2: ",$cgi->textfield(-name => "genome2", -size => 15),
842 :     $cgi->scrolling_list( -name => 'request',
843 :     -values => [ 'common', 'in1_not2','in2_not1' ],
844 :     -size => 3,
845 :     ), $cgi->br,
846 :     $cgi->submit('Compare Reconstructions'),": ",
847 :     $cgi->end_form
848 :     );
849 : overbeek 1.117
850 :     push(@$html,
851 :     $cgi->hr,
852 :     $cgi->h2('Compare Genomes'),
853 :     "If you wish to compare the contents of several genomes, you can use this tool.
854 :     Choose a set of genomes (at least two).<br><br> ",
855 :     $cgi->start_form(-action => 'comp_genomes.cgi'),
856 :     "Enter user: ",
857 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
858 :     $cgi->scrolling_list( -name => 'comp_orgs',
859 : overbeek 1.129 -values => [ @orgs ],
860 :     -labels => \%org_labels,
861 : overbeek 1.117 -size => 10,
862 :     -multiple => 1,
863 :     ), $cgi->br,$cgi->br,
864 : overbeek 1.118 "<br><br>",
865 : overbeek 1.117 "Optionally, you can select a PEG and window size to limit the comparison:<br>",
866 :     "PEG: ", $cgi->textfield(-name => "peg", -size => 20), $cgi->br,
867 :     "Window Size: ", $cgi->textfield(-name => "sz", -size => 8, -value => 20000), $cgi->br,
868 :    
869 : overbeek 1.119 $cgi->submit('Compare Genomes'),
870 :     $cgi->submit('Update Functions in MouseOvers'),"<br>",
871 : overbeek 1.117 $cgi->end_form
872 :     );
873 : overbeek 1.120
874 :     push(@$html,
875 :     $cgi->hr,
876 :     $cgi->h2('New Pattern Matching'),
877 :     "The new pattern location tool.<br><br> ",
878 :     $cgi->start_form(-action => 'locate_patterns.cgi'),
879 :     "Enter user (optional): ",
880 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
881 :     $cgi->scrolling_list( -name => 'comp_orgs',
882 : olson 1.123 -values => [ @org_names ],
883 : overbeek 1.120 -size => 10,
884 :     -multiple => 1,
885 :     ), $cgi->br,$cgi->br,
886 :     "<br><br>",
887 :     "Pattern: ", $cgi->textfield(-name => "pattern", -size => 60), $cgi->br,
888 :     $cgi->popup_menu(-name => 'Tool',
889 :     -values => ['Protein scan_for_matches', 'DNA scan_for_matches'],
890 :     -default => 'Protein scan_for_matches'),
891 :     $cgi->submit('Scan For Matches'),
892 :     $cgi->end_form
893 :     );
894 : efrank 1.1 }
895 :    
896 : olson 1.123 sub compute_genome_label
897 :     {
898 :     my($fig, $org) = @_;
899 :    
900 : olson 1.137 my $label;
901 : olson 1.123 my $gs = $fig->genus_species($org);
902 :     if ($fig->genome_domain($org) ne "Environmental Sample")
903 :     {
904 :     my $gc=$fig->number_of_contigs($org);
905 :     $label = "$gs ($org) [$gc contigs]";
906 :     }
907 :     else
908 :     {
909 :     $label = "$gs ($org)";
910 :     }
911 :     return $label;
912 :     }
913 :    
914 : overbeek 1.83 #
915 :     # Show a message of the day if it's present.
916 :     #
917 :     sub show_motd
918 :     {
919 :     my($fig, $cgi, $html) = @_;
920 :    
921 :     my $motd_file = "$FIG_Config::fig_disk/config/motd";
922 :    
923 :     if (open(F, "<$motd_file"))
924 :     {
925 :     push(@$html, "<p>\n");
926 :     while (<F>)
927 :     {
928 :     push(@$html, $_);
929 :     }
930 :     close(F);
931 :     push(@$html, "<hr>\n");
932 :     }
933 :     }
934 : overbeek 1.17
935 :     #==============================================================================
936 :     # Indexed objects (text search)
937 :     #==============================================================================
938 :    
939 : efrank 1.1 sub show_indexed_objects {
940 : olson 1.137 my($fig, $cgi, $html, $pattern, $user) = @_;
941 : golsen 1.22 my($msg, $i);
942 : efrank 1.1
943 :     if ($pattern =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
944 :     {
945 : parrello 1.75 my $peg = $1;
946 :     my $user = $cgi->param('user');
947 :     $user = $user ? $user : "";
948 : paczian 1.131 # my @prot_out;
949 :     # if (defined($cgi->param('fromframe'))) {
950 :     # $ENV{'REQUEST_METHOD'} = "GET";
951 :     # $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user\&action=proteinpage";
952 :     # $ENV{"REQUEST_URI"} =~ s/$this_script/frame.cgi/;
953 :     # $ENV{"SCRIPT_NAME"} =~ s/$this_script/frame.cgi/;
954 :     # @prot_out = TICK("./frame.cgi");
955 :     # } else {
956 :     # $ENV{'REQUEST_METHOD'} = "GET";
957 :     # $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user";
958 :     # $ENV{"REQUEST_URI"} =~ s/$this_script/protein.cgi/;
959 :     # $ENV{"SCRIPT_NAME"} =~ s/$this_script/protein.cgi/;
960 :     # @prot_out = TICK("./protein.cgi");
961 :     # }
962 :     # print @prot_out;
963 :     if ($FIG_Config::anno3_mode) {
964 :     print $cgi->redirect("seedviewer.cgi?page=Annotation&feature=$peg&user=$user");
965 : paczian 1.102 } else {
966 : paczian 1.131 print $cgi->redirect("protein.cgi?prot=$peg&user=$user");
967 : paczian 1.102 }
968 : olson 1.137 &done;
969 : efrank 1.1 }
970 : overbeek 1.71 $pattern =~ s/([a-zA-Z0-9])\|([a-zA-Z0-9])/$1\\\|$2/ig;
971 : efrank 1.92
972 :     my $search_kind = $cgi->param("search_kind");
973 :     if ( $search_kind && ! ($search_kind eq "DIRECT") ) {
974 :     #otherwise $search_kind is name of controlled vocab
975 :     find_pegs_by_cv($fig, $cgi, $html, $user, $pattern, $search_kind);
976 :     return;
977 :     }
978 :    
979 : overbeek 1.17 push( @$html, $cgi->br );
980 : olson 1.134
981 :     my( $peg_index_data, $role_index_data );
982 :    
983 : olson 1.136 if ($cgi->param('sphinx_search') && @FIG_Config::sphinx_params)
984 : olson 1.134 {
985 :     my $sphinx = Sphinx::Search->new();
986 : olson 1.136 $sphinx->SetServer(@FIG_Config::sphinx_params);
987 : olson 1.135
988 :     my $offset = $cgi->param('sphinx_offset');
989 :     if ($offset =~ /(\d+)/)
990 :     {
991 :     $offset = $1;
992 :     }
993 :     else
994 :     {
995 :     $offset = 0;
996 :     }
997 :     $sphinx->SetLimits($offset, $cgi->param("maxpeg"));
998 : olson 1.139 $sphinx->SetMatchMode($sphinx_search_mode);
999 : olson 1.136 print STDERR "pattern=$pattern\n";
1000 : olson 1.140 my $res = $sphinx->Query($pattern, 'feature_all_index');
1001 : olson 1.134
1002 : olson 1.135 $offset += $cgi->param('maxpeg');
1003 :     $cgi->param(sphinx_offset => $offset);
1004 :     push(@$html,
1005 :     $cgi->start_form(-method => 'post',
1006 :     -action => 'index.cgi'),
1007 :     $cgi->hidden(-name => 'sphinx_offset', -value => $offset),
1008 : olson 1.136 $cgi->hidden(-name => 'suppress_aliases', -value => $cgi->param('suppress_aliases')),
1009 : olson 1.135 $cgi->hidden(-name => 'maxpeg', -value => $cgi->param('maxpeg')),
1010 :     $cgi->hidden(-name => 'pattern', -value => $cgi->param('pattern')),
1011 :     $cgi->submit(-name => 'sphinx_search', -value => "More hits"),
1012 :     $cgi->end_form());
1013 :    
1014 :    
1015 : olson 1.134 $peg_index_data = [];
1016 :     $role_index_data = [];
1017 :    
1018 :     my @fids;
1019 : olson 1.140
1020 : olson 1.134 for my $row (@{$res->{matches}})
1021 :     {
1022 :     my $doc = $row->{doc};
1023 : olson 1.140 my $fid = SeedSearch::docid_to_fid($doc);
1024 :     print STDERR Dumper($doc, $fid);
1025 : olson 1.134 next unless $fig->is_real_feature($fid);
1026 :     push(@fids, $fid);
1027 :     }
1028 :    
1029 :     my $fns = $fig->function_of_bulk(\@fids);
1030 : olson 1.135 my $aliases = {};
1031 :     if (!$cgi->param('suppress_aliases'))
1032 :     {
1033 :     $aliases = $fig->feature_aliases_bulk(\@fids);
1034 :     }
1035 : olson 1.134
1036 :     for my $fid (@fids)
1037 :     {
1038 :     my $fn = $fns->{$fid};
1039 : golsen 1.141 my $aliases = $aliases->{$fid} ? join(" ", @{$aliases->{$fid}}) : "";
1040 : olson 1.134 my $gs = $fig->genus_species(&FIG::genome_of($fid));
1041 :    
1042 :     push(@$peg_index_data, [$fid, $gs, $aliases, $fn]);
1043 :     }
1044 :     }
1045 :     else
1046 :     {
1047 :     ( $peg_index_data, $role_index_data ) = $fig->search_index($pattern, $cgi->param("substring_match") eq "on");
1048 :     }
1049 :    
1050 : overbeek 1.17 my $maxpeg = defined( $cgi->param("maxpeg") ) ? $cgi->param("maxpeg") : 100;
1051 :     my $maxrole = defined( $cgi->param("maxrole") ) ? $cgi->param("maxrole") : 100;
1052 : mkubal 1.109 my $output_file = "$FIG_Config::temp/search_results.txt";
1053 : parrello 1.115 Trace("Producing search output file $output_file") if T(3);
1054 : mkubal 1.109 open(OUT,">$output_file");
1055 : overbeek 1.17
1056 : redwards 1.53 # RAE added lines to allow searching within a single organism
1057 : golsen 1.59 # if ($cgi->param('korgs'))
1058 :     # {
1059 :     # $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/;
1060 :     # $org=$1; # this should be undef if korgs is not defined
1061 :    
1062 :     # push (@$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p);
1063 :     # my @clean_data; my @clean_index;
1064 :     # while (@$peg_index_data)
1065 :     # {
1066 :     # my ($data, $index)=(shift @$peg_index_data, shift @$role_index_data);
1067 :     # next unless (${$data}[0] =~ /^fig\|$org\.peg/);
1068 :     # push @clean_data, $data;
1069 :     # push @clean_index, $index;
1070 :     # }
1071 :    
1072 :     # @$peg_index_data=@clean_data;
1073 :     # @$role_index_data=@clean_index;
1074 :     # }
1075 :     ## End of added lines
1076 : redwards 1.53
1077 : mkubal 1.99 # RAE version with separate submit buttoxns and more than one org in korg
1078 : redwards 1.63 # this is used by organisms.cgi for group specific searches
1079 :     if ( $cgi->param('korgs') && $cgi->param('Search Selected Organisms')
1080 :     )
1081 :     {
1082 :     my @temp;
1083 :     foreach my $org ($cgi->param('korgs'))
1084 :     {
1085 :     push @temp, grep { $_->[0] =~ /^fig\|$org/ } @$peg_index_data;
1086 :     }
1087 :     @$peg_index_data = @temp;
1088 :     }
1089 :    
1090 : golsen 1.59 # GJO version with separate submit buttons
1091 : redwards 1.53
1092 : olson 1.123 if ( $cgi->param('korgs') && $cgi->param('korgs') =~ /(\d+\.\d+)/
1093 : golsen 1.59 && $cgi->param('Search genome selected below')
1094 :     )
1095 :     {
1096 : parrello 1.75 my $org = $1;
1097 : olson 1.123 my $label = compute_genome_label($fig, $org);
1098 :     push @$html, $cgi->br, "Matches found in $label", $cgi->p;
1099 : mkubal 1.79 @$peg_index_data = grep { $_->[0] =~ /^fig\|$org\.*/ } @$peg_index_data;
1100 : redwards 1.53 }
1101 : parrello 1.115 Trace("Initial push.") if T(3);
1102 : golsen 1.59 if ( ( $maxpeg > 0 ) && @$peg_index_data )
1103 : overbeek 1.17 {
1104 : parrello 1.75 # RAE: Added javascript buttons see below. Only two things are needed.
1105 :     # The form must have a name parameter, and the one line of code for the
1106 :     # buttons. Everything else is automatic
1107 :    
1108 :     push( @$html, $cgi->start_form( -method => 'post',
1109 :     -target => "window$$",
1110 :     -action => 'fid_checked.cgi',
1111 :     -name => 'found_pegs'
1112 :     ),
1113 :     $cgi->hidden(-name => 'user', -value => $user),
1114 :     "For Selected (checked) sequences: ",
1115 :     $cgi->submit('get sequences'),
1116 :     $cgi->submit('view annotations'),
1117 :     $cgi->submit('assign/annotate'),
1118 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
1119 :     $cgi->br, $cgi->br
1120 :     );
1121 : efrank 1.1
1122 : redwards 1.63 # RAE Add the check all/uncheck all boxes.
1123 :     push (@$html, $cgi->br, &HTML::java_buttons("found_pegs", "checked"), $cgi->br);
1124 : parrello 1.75
1125 :     my $n = @$peg_index_data;
1126 :     if ($n > $maxpeg)
1127 :     {
1128 :     $msg = "Showing first $maxpeg out of $n protein genes";
1129 :     $#{$peg_index_data} = $maxpeg-1;
1130 :     }
1131 :     else
1132 :     {
1133 : mkubal 1.79 $msg = "Showing $n FEATURES";
1134 : parrello 1.75 }
1135 :    
1136 : parrello 1.115 my $col_hdrs = ["Sel","FEATURE","Organism","Aliases","Functions","Who","Attributes"];
1137 : overbeek 1.106 my $tab = [ map { format_peg_entry( $fig, $cgi, $_ ) } sort {$a->[1] cmp $b->[1]} @$peg_index_data ];
1138 : mkubal 1.109
1139 :     my $tab2 = [ sort {$a->[1] cmp $b->[1]} @$peg_index_data ];
1140 : parrello 1.115 Trace("Final html push.") if T(3);
1141 : mkubal 1.109 push( @$html,$cgi->br,
1142 :     "<a href=$FIG_Config::temp_url/search_results.txt>Download_Search_Results</a>",
1143 :     &HTML::make_table($col_hdrs,$tab,$msg),
1144 : parrello 1.75 $cgi->br,
1145 :     "For SELECTed (checked) sequences: ",
1146 :     $cgi->submit('get sequences'),
1147 :     $cgi->submit('view annotations'),
1148 :     $cgi->submit('assign/annotate'),
1149 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
1150 :     $cgi->br,
1151 :     $cgi->end_form
1152 : mkubal 1.109 );
1153 :    
1154 :     foreach my $t (@$tab2){
1155 :     my $string = join("\t",@$t);
1156 :     print OUT "$string\n";
1157 :     }
1158 :    
1159 : efrank 1.1 }
1160 : golsen 1.59 elsif ( $maxpeg > 0 )
1161 :     {
1162 : parrello 1.75 push @$html, $cgi->h3('No matching protein genes');
1163 : golsen 1.59 }
1164 : overbeek 1.17
1165 : golsen 1.59 if ( ( $maxrole > 0 ) && @$role_index_data )
1166 : efrank 1.1 {
1167 : parrello 1.75 my $n = @$role_index_data;
1168 :     if ($n > $maxrole)
1169 :     {
1170 :     $msg = "Showing first $maxrole out of $n Roles";
1171 :     $#{$role_index_data} = $maxrole - 1;
1172 :     }
1173 :     else
1174 :     {
1175 :     $msg = "Showing $n Roles";
1176 :     }
1177 :    
1178 :     if ( $maxpeg > 0 ) { push( @$html, $cgi->hr ) }
1179 :     my $col_hdrs = ["Role"];
1180 :     my $tab = [ map { &format_role_entry($fig,$cgi,$_) } @$role_index_data ];
1181 :     push( @$html, &HTML::make_table($col_hdrs,$tab,$msg) );
1182 : efrank 1.1 }
1183 : golsen 1.59 elsif ( $maxrole > 0 )
1184 :     {
1185 : parrello 1.75 push @$html, $cgi->h3('No matching roles');
1186 : golsen 1.59 }
1187 : parrello 1.115 Trace("Show-indexed-objects method complete.") if T(3);
1188 : efrank 1.1 }
1189 :    
1190 :     sub format_peg_entry {
1191 : golsen 1.67 my( $fig, $cgi, $entry ) = @_;
1192 : efrank 1.1
1193 : parrello 1.115 my($peg,$gs,$aliases,$function,$who,$attribute) = @$entry;
1194 : overbeek 1.17
1195 : golsen 1.21 $gs =~ s/\s+\d+$//; # Org name comes with taxon_id appended (why?) -- GJO
1196 : efrank 1.1
1197 : golsen 1.67 my $box = "<input type=checkbox name=checked value=\"$peg\">";
1198 : overbeek 1.132 my $peg_link;
1199 :     if ($FIG_Config::anno3_mode) {
1200 :     my $user = $cgi->param('user');
1201 :     $peg_link = "<a href=seedviewer.cgi?page=Annotation&feature=$peg&user=$user>$peg</a>";
1202 :     } else {
1203 :     $peg_link = &HTML::fid_link($cgi,$peg);
1204 :     }
1205 :     return [ $box, $peg_link, $gs, $aliases, $function, $who ];
1206 : efrank 1.1 }
1207 :    
1208 :     sub format_role_entry {
1209 :     my($fig,$cgi,$entry) = @_;
1210 :    
1211 :     return [&HTML::role_link($cgi,$entry)];
1212 :     }
1213 :    
1214 :     sub run_prot_scan_for_matches {
1215 :     my($fig,$cgi,$html,$org,$pat) = @_;
1216 :     my($string,$peg,$beg,$end,$user,$col_hdrs,$tab,$i);
1217 :    
1218 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
1219 :     open(PAT,">$tmp_pat")
1220 : parrello 1.75 || die "could not open $tmp_pat";
1221 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
1222 :     print PAT "$pat\n";
1223 :     close(PAT);
1224 :     my @out = `$FIG_Config::ext_bin/scan_for_matches -p $tmp_pat < $FIG_Config::organisms/$org/Features/peg/fasta`;
1225 :     if (@out < 1)
1226 :     {
1227 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
1228 : efrank 1.1 }
1229 :     else
1230 :     {
1231 : parrello 1.75 if (@out > 2000)
1232 :     {
1233 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
1234 :     $#out = 1999;
1235 :     }
1236 :    
1237 :     push(@$html,$cgi->pre);
1238 :     $user = $cgi->param('user');
1239 :     $col_hdrs = ["peg","begin","end","string","function of peg"];
1240 :     for ($i=0; ($i < @out); $i += 2)
1241 :     {
1242 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
1243 :     {
1244 :     $peg = $1;
1245 :     $beg = $2;
1246 :     $end = $3;
1247 :     $string = $out[$i+1];
1248 :     chomp $string;
1249 :     push( @$tab, [ &HTML::fid_link($cgi,$peg,1),
1250 :     $beg,
1251 :     $end,
1252 :     $string,
1253 :     scalar $fig->function_of( $peg, $user )
1254 :     ]
1255 :     );
1256 :     }
1257 :     }
1258 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
1259 :     push(@$html,$cgi->end_pre);
1260 : efrank 1.1 }
1261 :     unlink($tmp_pat);
1262 :     }
1263 :    
1264 : overbeek 1.17 #==============================================================================
1265 :     # Scan for matches
1266 :     #==============================================================================
1267 :    
1268 : efrank 1.1 sub run_dna_scan_for_matches {
1269 :     my($fig,$cgi,$html,$org,$pat) = @_;
1270 :     my($string,$contig,$beg,$end,$col_hdrs,$tab,$i);
1271 :    
1272 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
1273 :     open(PAT,">$tmp_pat")
1274 : parrello 1.75 || die "could not open $tmp_pat";
1275 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
1276 :     print PAT "$pat\n";
1277 :     close(PAT);
1278 :     my @out = `cat $FIG_Config::organisms/$org/contigs | $FIG_Config::ext_bin/scan_for_matches -c $tmp_pat`;
1279 :     if (@out < 1)
1280 :     {
1281 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
1282 : efrank 1.1 }
1283 :     else
1284 :     {
1285 : parrello 1.75 if (@out > 2000)
1286 :     {
1287 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
1288 :     $#out = 1999;
1289 :     }
1290 :    
1291 :     push(@$html,$cgi->pre);
1292 :     $col_hdrs = ["contig","begin","end","string"];
1293 :     for ($i=0; ($i < @out); $i += 2)
1294 :     {
1295 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
1296 :     {
1297 :     $contig = $1;
1298 :     $beg = $2;
1299 :     $end = $3;
1300 :     $string = $out[$i+1];
1301 :     chomp $string;
1302 :     push(@$tab,[$contig,$beg,$end,$string]);
1303 :     }
1304 :     }
1305 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
1306 :     push(@$html,$cgi->end_pre);
1307 : efrank 1.1 }
1308 :     unlink($tmp_pat);
1309 :     }
1310 :    
1311 : overbeek 1.17 #==============================================================================
1312 :     # BLAST search
1313 :     #==============================================================================
1314 :    
1315 : efrank 1.1 sub run_blast {
1316 : olson 1.137 my( $fig, $cgi, $html, $org, $tool, $seq, $user ) = @_;
1317 : golsen 1.45 my( $query, @out );
1318 : efrank 1.1
1319 : golsen 1.45 my $tmp_seq = "$FIG_Config::temp/run_blast_tmp$$.seq";
1320 : efrank 1.1
1321 : overbeek 1.17 #--------------------------------------------------------------------------
1322 : golsen 1.97 # Does the request require a defined genome? We never check that the
1323 :     # database build works, so the least we can do is some up-front tests.
1324 :     # -- GJO
1325 :     #--------------------------------------------------------------------------
1326 :    
1327 :     if ( $tool !~ /complete genomes/ )
1328 :     {
1329 :     if ( ! $org || ! -d "$FIG_Config::organisms/$org" )
1330 :     {
1331 :     push @$html, $cgi->h2("Sorry, $tool requires selecting a genome." );
1332 :     return;
1333 :     }
1334 :    
1335 :     if ( ( $tool =~ /blastn/ ) || ( $tool =~ /tblastx/ ) )
1336 :     {
1337 :     if ( ! -f "$FIG_Config::organisms/$org/contigs" )
1338 :     {
1339 :     push @$html, $cgi->h2("Sorry, cannot find DNA data for genome $org." );
1340 :     return;
1341 :     }
1342 :     }
1343 :     else
1344 :     {
1345 :     if ( ! -f "$FIG_Config::organisms/$org/Features/peg/fasta" )
1346 :     {
1347 :     push @$html, $cgi->h2("Sorry, cannot find protein data for genome $org." );
1348 :     return;
1349 :     }
1350 :     }
1351 :     }
1352 :    
1353 :     #--------------------------------------------------------------------------
1354 : overbeek 1.17 # Is the request for an id? Get the sequence
1355 :     #--------------------------------------------------------------------------
1356 : golsen 1.97
1357 : golsen 1.91 if ( ( $query ) = $seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/ )
1358 : efrank 1.1 {
1359 : parrello 1.75 # Replaced $id with $query so that output inherits label -- GJO
1360 : golsen 1.91 # Found ugly fairure to build correct query sequence for
1361 :     # 'blastp against complete genomes'. Can't figure out
1362 : golsen 1.141 # why it ever worked with an id -- GJO
1363 : golsen 1.91
1364 : parrello 1.75 $seq = "";
1365 : golsen 1.91 if ( ($tool eq "blastp") || ($tool eq "tblastn")
1366 :     || ($tool eq 'blastp against complete genomes')
1367 :     )
1368 : parrello 1.75 {
1369 :     $seq = $fig->get_translation($query);
1370 : golsen 1.97 my $func = $fig->function_of( $query, $user );
1371 :     $query .= " $func" if $func;
1372 : parrello 1.75 }
1373 :     elsif ($query =~ /^fig/)
1374 :     {
1375 :     my @locs;
1376 :     if ((@locs = $fig->feature_location($query)) && (@locs > 0))
1377 :     {
1378 :     $seq = $fig->dna_seq($fig->genome_of($query),@locs);
1379 :     }
1380 :     }
1381 :     if (! $seq)
1382 :     {
1383 :     push(@$html,$cgi->h1("Sorry, could not get sequence for $query"));
1384 :     return;
1385 :     }
1386 : efrank 1.1 }
1387 : golsen 1.45
1388 : overbeek 1.17 #--------------------------------------------------------------------------
1389 :     # Is it a fasta format? Get the query name
1390 :     #--------------------------------------------------------------------------
1391 : golsen 1.45
1392 : golsen 1.141 elsif ( $seq =~ s/^\s*>\s*(\S+[^\n\012\015]*)// ) # more flexible match -- GJO
1393 : efrank 1.1 {
1394 : parrello 1.75 $query = $1;
1395 : efrank 1.1 }
1396 : golsen 1.45
1397 : overbeek 1.17 #--------------------------------------------------------------------------
1398 :     # Take it as plain text
1399 :     #--------------------------------------------------------------------------
1400 : golsen 1.45
1401 : efrank 1.1 else
1402 :     {
1403 : parrello 1.75 $query = "query";
1404 : efrank 1.1 }
1405 : golsen 1.45
1406 :     #
1407 :     # The rest is taken as the sequence
1408 :     #
1409 :    
1410 : golsen 1.23 $seq =~ s/\s+//g;
1411 : golsen 1.45 open( SEQ, ">$tmp_seq" ) || die "run_blast could not open $tmp_seq";
1412 : efrank 1.1 print SEQ ">$query\n$seq\n";
1413 : golsen 1.45 close( SEQ );
1414 : efrank 1.1
1415 :     if (! $ENV{"BLASTMAT"}) { $ENV{"BLASTMAT"} = "$FIG_Config::blastmat" }
1416 : golsen 1.88 my $blast_opt = $cgi->param( 'blast_options' ) || '';
1417 : efrank 1.1
1418 : golsen 1.45 if ( $tool eq "blastp" )
1419 : efrank 1.1 {
1420 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
1421 :     &verify_db( $db, "p" );
1422 : golsen 1.126 @out = map { &HTML::set_prot_links($cgi,$_) }
1423 :     remove_deleted_fids( $fig, execute_blastall( 'blastp', $tmp_seq, $db, $blast_opt ) );
1424 : efrank 1.1 }
1425 : golsen 1.45
1426 :     elsif ( $tool eq "blastx" )
1427 : efrank 1.1 {
1428 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
1429 :     &verify_db( $db, "p" );
1430 : golsen 1.126 @out = map { &HTML::set_prot_links($cgi,$_) }
1431 :     remove_deleted_fids( $fig, execute_blastall( 'blastx', $tmp_seq, $db, $blast_opt ) );
1432 : efrank 1.1 }
1433 : golsen 1.45
1434 :     elsif ( $tool eq "blastn" )
1435 : efrank 1.1 {
1436 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
1437 :     &verify_db( $db, "n" ); ### fix to get all contigs
1438 : golsen 1.88 @out = execute_blastall( 'blastn', $tmp_seq, $db, "-r 1 -q -1 " . $blast_opt );
1439 : parrello 1.75 push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
1440 : efrank 1.1 }
1441 : golsen 1.45
1442 :     elsif ( $tool eq "tblastn" )
1443 : efrank 1.1 {
1444 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
1445 :     &verify_db( $db, "n" ); ### fix to get all contigs
1446 : golsen 1.88 @out = execute_blastall( 'tblastn', $tmp_seq, $db, $blast_opt );
1447 : parrello 1.75 push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
1448 : efrank 1.1 }
1449 : golsen 1.45
1450 :     elsif ( $tool eq 'blastp against complete genomes' ) ### this tool gets nonstandard treatment: RAO
1451 : overbeek 1.30 {
1452 : golsen 1.103 &blast_complete( $fig, $cgi, $html, $tmp_seq, $query, $seq );
1453 : golsen 1.91 unlink( $tmp_seq );
1454 :     return;
1455 : overbeek 1.30 }
1456 : golsen 1.45
1457 : overbeek 1.17 if (@out < 1) # This is really a bigger problem than no hits (GJO)
1458 : efrank 1.1 {
1459 : golsen 1.88 push @$html, $cgi->h1( "Sorry, no blast output" );
1460 : efrank 1.1 }
1461 :     else
1462 :     {
1463 : golsen 1.88 push @$html, $cgi->pre, @out, $cgi->end_pre;
1464 : efrank 1.1 }
1465 : golsen 1.45 unlink( $tmp_seq );
1466 : efrank 1.1 }
1467 :    
1468 : golsen 1.141 #==============================================================================
1469 :     # Identical SEED proteins
1470 :     #==============================================================================
1471 :    
1472 :     sub identical_seed_proteins {
1473 :     my( $fig, $cgi, $html, $seq, $user ) = @_;
1474 :    
1475 :     #--------------------------------------------------------------------------
1476 :     # Is the request for an id? Get the MD5
1477 :     #--------------------------------------------------------------------------
1478 :    
1479 :     my( $query, $md5, @out );
1480 :     if ( ( $query ) = $seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/ )
1481 :     {
1482 :     # Replaced $id with $query so that output inherits label -- GJO
1483 :     # Found ugly fairure to build correct query sequence for
1484 :     # 'blastp against complete genomes'. Can't figure out
1485 :     # why it ever worked with an id -- GJO
1486 :    
1487 :     $md5 = $fig->md5_of_peg( $query );
1488 :     }
1489 :    
1490 :     #--------------------------------------------------------------------------
1491 :     # It is a sequence. Compute the MD5.
1492 :     #--------------------------------------------------------------------------
1493 :    
1494 :     else
1495 :     {
1496 :     $query = ( $seq =~ s/^\s*>([^\n\012\015]*)// ) ? $1 : 'query';
1497 :     $seq =~ s/[^A-Za-z]+//g;
1498 :     $md5 = $seq ? Digest::MD5::md5_hex( uc $seq ) : '';
1499 :     }
1500 :    
1501 :     if ( ! $md5 )
1502 :     {
1503 :     push( @$html, $cgi->h2( "Sorry, could not get sequence for $query" ) );
1504 :     return;
1505 :     }
1506 :    
1507 :     my $col_hdrs = [ 'ID', 'Genome', 'Function' ];
1508 :    
1509 :     my @rows = map { [ $_->[1], [ $_->[2], "TD BgColor=$_->[3]" ], $_->[4] ] }
1510 :     sort { lc $a->[2] cmp lc $b->[2]
1511 :     || $a->[5] <=> $b->[5]
1512 :     || $a->[6] <=> $b->[6]
1513 :     || $a->[7] <=> $b->[7]
1514 :     }
1515 :     map { [ $_, # fid
1516 :     HTML::fid_link( $cgi, $_ ), # fid_link
1517 :     $fig->org_and_color_of( $_ ), # genus_species, html_color
1518 :     scalar $fig->function_of( $_ ), # func
1519 :     /\|(\d+)\.(\d+)\.[^.]+\.(\d+)/ # taxid, genver, pegnum
1520 :     ]
1521 :     }
1522 :     grep { ! $fig->is_deleted_fid( $_ ) }
1523 :     $fig->pegs_with_md5( $md5 );
1524 :    
1525 :     if ( ! @rows )
1526 :     {
1527 :     push( @$html, $cgi->h2( "No SEED proteins identical to $query" ) );
1528 :     return;
1529 :     }
1530 :    
1531 :     push( @$html, $cgi->br, $cgi->br, "\n",
1532 :     &HTML::make_table($col_hdrs, \@rows, "SEED proteins identical to $query" )
1533 :     );
1534 :     return;
1535 :     }
1536 :    
1537 : golsen 1.126 #
1538 :     # @blast_text = remove_deleted_fids( $fig, @blast_text )
1539 :     #
1540 :     # The blast datebases include all the proteins, including those deleted.
1541 :     # This is a text filter to remove those deleted. Requires 3 states:
1542 :     #
1543 :     # $delete
1544 :     # 0 Pass the line
1545 :     # 1 Delete current line
1546 :     # 2 Delete until next subject sequence
1547 :     #
1548 :     sub remove_deleted_fids
1549 :     {
1550 :     my $fig = shift;
1551 :     my $delete = 0;
1552 :    
1553 :     grep { if ( /^(fig\|\d+\.\d+\.[^.]+\.\d+)/ )
1554 :     {
1555 :     $delete = $fig->is_deleted_fid( $1 ) ? 1 : 0;
1556 :     }
1557 :     elsif ( /^>(fig\|\d+\.\d+\.[^.]+\.\d+)/ )
1558 :     {
1559 :     $delete = $fig->is_deleted_fid( $1 ) ? 2 : 0;
1560 :     }
1561 :     elsif ( $delete == 1 ) # Deleted 1 previous line
1562 :     {
1563 :     $delete = 0;
1564 :     }
1565 :     elsif ( /^ +Database: / ) # No more subject sequences
1566 :     {
1567 :     $delete = 0;
1568 :     }
1569 :     ! $delete # If we don't want to delete, pass the line
1570 :     } @_;
1571 :     }
1572 : golsen 1.45
1573 : golsen 1.88 # `$blastall -p $prog -i $tmp_seq -d $db $blast_opt`
1574 :     # execute_blastall( $prog, $input_file, $db, $options )
1575 :    
1576 :     sub execute_blastall
1577 :     {
1578 :     my( $prog, $input, $db, $options ) = @_;
1579 :    
1580 :     my $blastall = "$FIG_Config::ext_bin/blastall";
1581 :     my @args = ( '-p', $prog, '-i', $input, '-d', $db, split(/\s+/, $options) );
1582 :    
1583 :     my $bfh;
1584 :     my $pid = open( $bfh, "-|" );
1585 :     if ( $pid == 0 )
1586 :     {
1587 :     exec( $blastall, @args );
1588 :     die join( " ", $blastall, @args, "failed: $!" );
1589 :     }
1590 :    
1591 : golsen 1.91 <$bfh>
1592 : golsen 1.88 }
1593 :    
1594 :    
1595 : golsen 1.91 # Changed to:
1596 :     # Include low complexity filter in blast search.
1597 :     # Remove all but first match to a given database sequence.
1598 :     # Sort by bit-score, not E-value (which becomes equal for all strong matches).
1599 :     # Limit to 1000 matches.
1600 :     # -- GJO
1601 : golsen 1.126 #
1602 :     # Removal of deleted fids is in format_sims (really belongs here).
1603 :     # Function to remove duplicates is duplicated in format_sims.
1604 :     # Limit of output to 1000 is also in format_sims.
1605 : golsen 1.91
1606 :     sub blast_complete
1607 :     {
1608 : golsen 1.103 my( $fig, $cgi, $html, $seqfile, $query, $seq ) = @_;
1609 : golsen 1.88 my( $genome, @sims );
1610 :    
1611 : overbeek 1.30 @sims = ();
1612 : golsen 1.91 foreach $genome ( $fig->genomes("complete") )
1613 : overbeek 1.30 {
1614 : parrello 1.75 my $db = "$FIG_Config::organisms/$genome/Features/peg/fasta";
1615 :     next if (! -s $db);
1616 : overbeek 1.30
1617 : parrello 1.75 &verify_db($db,"p");
1618 :     my $sim;
1619 : golsen 1.91 my %seen = ();
1620 :     push @sims, map { chomp;
1621 :     $sim = [ split /\t/ ];
1622 :     $sim->[10] =~ s/^e-/1.0e-/;
1623 :     $seen{ $sim->[1] }++ ? () : $sim
1624 :     }
1625 :     execute_blastall( 'blastp', $seqfile, $db, '-m 8 -F T -e 1e-5' );
1626 : overbeek 1.30 }
1627 : golsen 1.91
1628 :     @sims = sort { $b->[11] <=> $a->[11] } @sims;
1629 :     if ( @sims > 1000 ) { @sims = @sims[0 .. 999] }
1630 : golsen 1.103 &format_sims( $fig, $cgi, $html, \@sims, $query, $seq );
1631 : overbeek 1.30 }
1632 :    
1633 : golsen 1.65
1634 :     #------------------------------------------------------------------------------
1635 : golsen 1.97 # Graphically display search results on contigs
1636 : golsen 1.65 #
1637 :     # use FIGjs qw( toolTipScript );
1638 :     # use GenoGraphics qw( render );
1639 :     #------------------------------------------------------------------------------
1640 :     #
1641 : golsen 1.97 # Fields produced by next_blast_hsp:
1642 :     #
1643 :     # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1644 :     # qid qdef qlen sid sdef slen scr e_val p_n p_val n_mat n_id n_pos n_gap dir q1 q2 qseq s1 s2 sseq
1645 : golsen 1.65 #------------------------------------------------------------------------------
1646 :    
1647 :     sub blast_graphics {
1648 :     my ( $fig_or_sprout, $cgi, $genome, $out, $tool ) = @_;
1649 :    
1650 :     my $e_min = 0.1;
1651 :     my $gg = [];
1652 : golsen 1.97 my @html = ();
1653 : golsen 1.65
1654 : golsen 1.97 # Changed to use standalone parsing function, not shell script -- GJO
1655 : golsen 1.65
1656 : golsen 1.97 my $outcopy = [ @$out ];
1657 :     while ( $_ = &gjoparseblast::next_blast_hsp( $outcopy ) )
1658 : golsen 1.65 {
1659 : golsen 1.97 my ( $qid, $qlen, $contig, $slen ) = @$_[0, 2, 3, 5 ];
1660 :     my ( $e_val, $n_mat, $n_id, $q1, $q2, $s1, $s2 ) = @$_[ 7, 10, 11, 15, 16, 18, 19 ];
1661 :     next if $e_val > $e_min;
1662 :     my ( $genes, $min, $max ) = hsp_context( $fig_or_sprout, $cgi, $genome,
1663 :     $e_val, 100 * $n_id / $n_mat,
1664 :     $qid, $q1, $q2, $qlen,
1665 :     $contig, $s1, $s2, $slen
1666 :     );
1667 :     if ($min && $max)
1668 : golsen 1.65 {
1669 : golsen 1.97 push @$gg, [ substr( $contig, 0, 18 ), $min, $max, $genes ];
1670 : golsen 1.65 }
1671 : golsen 1.97 }
1672 : golsen 1.65
1673 : golsen 1.97 # $gene = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
1674 :     # $genes = [ $gene, $gene, ... ];
1675 :     # $map = [ $label, $min_coord, $max_coord, $genes ];
1676 :     # $gg = [ $map, $map, ... ];
1677 :     # render( $gg, $width, $obj_half_heigth, $save, $img_index_number )
1678 :    
1679 :     if ( @$gg )
1680 :     {
1681 :     # print STDERR Dumper( $gg );
1682 :     my $gs = $fig_or_sprout->genus_species( $genome );
1683 :     my $space = "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
1684 :     my $legend = "<TABLE>\n"
1685 :     . " <TR>\n"
1686 :     . " <TD>Q = Query sequence$space</TD>\n"
1687 :     . " <TD Bgcolor='#FF0000'>$space</TD><TD>Frame 1 translation$space</TD>\n"
1688 :     . " <TD Bgcolor='#00FF00'>$space</TD><TD>Frame 2 translation$space</TD>\n"
1689 :     . " <TD Bgcolor='#0000FF'>$space</TD><TD>Frame 3 translation$space</TD>\n"
1690 :     . " <TD Bgcolor='#808080'>$space</TD><TD>Untranslated feature</TD>\n"
1691 :     . " </TR>\n"
1692 :     . "</TABLE><P />";
1693 :    
1694 :     push @html, "\n", FIGjs::toolTipScript(), "\n",
1695 :     $cgi->h2( "Results of $tool search of contigs from $gs\n"),
1696 :     $legend,
1697 :     @{ GenoGraphics::render( $gg, 600, 4, 0, 1 ) },
1698 :     $cgi->hr, "\n";
1699 : golsen 1.65 }
1700 :    
1701 :     return @html;
1702 :     }
1703 :    
1704 :    
1705 :     sub hsp_context {
1706 :     my( $fig_or_sprout, $cgi, $genome, $e_val, $pct_id,
1707 :     $qid, $q1, $q2, $qlen,
1708 : parrello 1.75 $contig, $s1, $s2, $slen ) = @_;
1709 : golsen 1.65 my $half_sz = 5000;
1710 :    
1711 :     my( $from, $to, $features, $fid, $beg, $end );
1712 :     my( $link, $lbl, $isprot, $function, $uniprot, $info, $prot_query );
1713 :    
1714 :     my $user = $cgi->param( 'user' ) || "";
1715 :     my $sprout = $cgi->param( 'SPROUT' ) ? '&SPROUT=1' : '';
1716 :    
1717 :     my @genes = ();
1718 :    
1719 :     # Based on the match position of the query, select the context region:
1720 :    
1721 :     ( $from, $to ) = ( $s1 <= $s2 ) ? ( $s1 - $half_sz, $s2 + $half_sz )
1722 :     : ( $s2 - $half_sz, $s1 + $half_sz );
1723 :     $from = 1 if ( $from < 1 );
1724 :     $to = $slen if ( $to > $slen );
1725 :    
1726 :     # Get the genes in the region, and adjust the ends to include whole genes:
1727 :    
1728 :     ( $features, $from, $to ) = genes_in_region( $fig_or_sprout, $cgi, $genome, $contig, $from, $to );
1729 :    
1730 : golsen 1.103 # Fix the end points if features have moved them to exclude query:
1731 :    
1732 :     if ( $s1 < $s2 ) { $from = $s1 if $s1 < $from; $to = $s2 if $s2 > $to }
1733 :     else { $from = $s2 if $s2 < $from; $to = $s1 if $s1 > $to }
1734 : golsen 1.65
1735 :     # Add the other features:
1736 :    
1737 :     foreach $fid ( @$features )
1738 :     {
1739 : parrello 1.75 my $contig1;
1740 :     ( $contig1, $beg, $end ) = boundaries_of( $fig_or_sprout, feature_locationS( $fig_or_sprout, $fid ) );
1741 :     next if $contig1 ne $contig;
1742 :    
1743 :     $link = "";
1744 :     if ( ( $lbl ) = $fid =~ /peg\.(\d+)$/ ) {
1745 : overbeek 1.82 ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/protein.cgi/;
1746 : parrello 1.75 $link .= "?prot=$fid&user=$user$sprout";
1747 :     $isprot = 1;
1748 :     } elsif ( ( $lbl ) = $fid =~ /\.([a-z]+)\.\d+$/ ) {
1749 :     $lbl = uc $lbl;
1750 :     $isprot = 0;
1751 :     } else {
1752 :     $lbl = "";
1753 :     $isprot = 0;
1754 :     }
1755 :    
1756 :     $function = function_ofS( $fig_or_sprout, $fid );
1757 :    
1758 :     $uniprot = join ", ", grep { /^uni\|/ } feature_aliasesL( $fig_or_sprout, $fid);
1759 :    
1760 :     $info = join( '<br />', "<b>Feature:</b> $fid",
1761 :     "<b>Contig:</b> $contig",
1762 :     "<b>Begin:</b> $beg",
1763 :     "<b>End:</b> $end",
1764 :     $function ? "<b>Function:</b> $function" : '',
1765 :     $uniprot ? "<b>Uniprot ID:</b> $uniprot" : ''
1766 :     );
1767 :    
1768 : golsen 1.97 # $gene = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
1769 :    
1770 : parrello 1.75 push @genes, [ feature_graphic( $beg, $end, $isprot ),
1771 :     $lbl, $link, $info,
1772 :     $isprot ? () : ( undef, "Feature information" )
1773 :     ];
1774 : golsen 1.65 }
1775 :    
1776 :     # Draw the query. The subject coordinates are always DNA. If the query
1777 :     # is protein, it is about 3 times shorter than the matching contig DNA.
1778 :     # Splitting the difference, if 1.7 times the query length is still less
1779 :     # than the subject length, we will call it a protein query (and reading
1780 :     # frame in the contig coordinates has meaning). If it is nucleotides,
1781 :     # there is no defined frame.
1782 :    
1783 :     $info = join( '<br />', $qid ne 'query ' ? "<b>Query:</b> $qid" : (),
1784 :     "<b>Length:</b> $qlen",
1785 :     "<b>E-value:</b> $e_val",
1786 :     "<b>% identity:</b> " . sprintf( "%.1f", $pct_id ),
1787 :     "<b>Region of similarity:</b> $q1 &#150; $q2"
1788 :     );
1789 :     $prot_query = ( 1.7 * abs( $q2 - $q1 ) < abs( $s2 - $s1 ) ) ? 1 : 0;
1790 :    
1791 : golsen 1.104 if ( $user && $prot_query )
1792 : golsen 1.97 {
1793 :     $link = $cgi->url(-relative => 1);
1794 :     $link =~ s/index\.cgi/propose_new_peg.cgi/;
1795 :     $link .= "?user=$user&genome=$genome&covering=${contig}_${s1}_${s2}";
1796 :     }
1797 :     else
1798 :     {
1799 :     $link = undef;
1800 :     }
1801 :    
1802 : golsen 1.65 push @genes, [ feature_graphic( $s1, $s2, $prot_query ),
1803 : golsen 1.97 'Q', $link, $info, undef, 'Query and match information'
1804 : golsen 1.65 ];
1805 :    
1806 :     return \@genes, $from, $to;
1807 :     }
1808 :    
1809 :    
1810 :     sub feature_graphic {
1811 :     my ( $beg, $end, $isprot ) = @_;
1812 :     my ( $min, $max, $symb, $color );
1813 :    
1814 :     ( $min, $max, $symb ) = ( $beg <= $end ) ? ( $beg, $end, "rightArrow" )
1815 :     : ( $end, $beg, "leftArrow" );
1816 :    
1817 :     # Color proteins by translation frame
1818 :    
1819 :     $color = $isprot ? qw( blue red green )[ $beg % 3 ] : 'grey';
1820 :    
1821 :     ( $min, $max, $symb, $color );
1822 :     }
1823 :    
1824 :    
1825 :     sub genes_in_region {
1826 :     my( $fig_or_sprout, $cgi, $genome, $contig, $min, $max ) = @_;
1827 :    
1828 :     if ( $cgi->param( 'SPROUT' ) )
1829 :     {
1830 : parrello 1.75 my( $x, $feature_id );
1831 :     my( $feat, $min, $max ) = $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1832 :     my @tmp = sort { ($a->[1] cmp $b->[1]) or
1833 :     (($a->[2]+$a->[3]) <=> ($b->[2]+$b->[3]))
1834 :     }
1835 :     map { $feature_id = $_;
1836 :     $x = feature_locationS( $fig_or_sprout, $feature_id );
1837 :     $x ? [ $feature_id, boundaries_of( $fig_or_sprout, $x )] : ()
1838 :     }
1839 :     @$feat;
1840 :     return ( [map { $_->[0] } @tmp ], $min, $max );
1841 : golsen 1.65 }
1842 :     else
1843 :     {
1844 : parrello 1.75 return $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1845 : golsen 1.65 }
1846 :     }
1847 :    
1848 :    
1849 :     sub feature_locationS {
1850 :     my ( $fig_or_sprout, $peg ) = @_;
1851 :     scalar $fig_or_sprout->feature_location( $peg );
1852 :     }
1853 :    
1854 :    
1855 :     sub boundaries_of {
1856 :     my( $fig_or_sprout, $loc ) = @_;
1857 :     $fig_or_sprout->boundaries_of( $loc );
1858 :     }
1859 :    
1860 :    
1861 :     sub function_ofS {
1862 :     my( $fig_or_sprout, $peg, $user ) = @_;
1863 :     scalar $fig_or_sprout->function_of( $peg, $user );
1864 :     }
1865 :    
1866 :    
1867 :     sub feature_aliasesL {
1868 :     my( $fig_or_sprout, $fid ) = @_;
1869 :     my @tmp = $fig_or_sprout->feature_aliases( $fid );
1870 :     @tmp
1871 :     }
1872 :    
1873 :    
1874 : overbeek 1.30 sub format_sims {
1875 : golsen 1.141 my( $fig, $cgi, $html, $sims, $query, $seq, $opts ) = @_;
1876 :     $opts = {} unless $opts && ( ref $opts eq 'HASH' );
1877 :    
1878 : golsen 1.103 my( $col_hdrs, $table, @ids, $ids, $sim, %seen, $n, $fid );
1879 : overbeek 1.30
1880 :     $col_hdrs = [ "Select up to here",
1881 : parrello 1.75 "Similar sequence",
1882 :     "E-val",
1883 :     "Function",
1884 :     "Organism",
1885 :     "Aliases"
1886 :     ];
1887 : overbeek 1.30
1888 :     $table = [];
1889 :     @ids = ();
1890 : golsen 1.103 $n = 0; # Count reported sequences
1891 :     foreach $sim ( @$sims )
1892 : overbeek 1.30 {
1893 : golsen 1.103 $fid = $sim->[1];
1894 :     next if $seen{ $fid }++; # One hit per sequence
1895 :     next if $fig->is_deleted_fid( $fid ); # Hide deleted sequences
1896 :     my $alii = scalar $fig->feature_aliases( $fid );
1897 :     $alii =~ s/,/, /g;
1898 :     push( @$table, [ $cgi->checkbox( -name => 'list_to',
1899 :     -value => $fid,
1900 :     -override => 1,
1901 :     -checked => 0,
1902 :     -label => ""
1903 :     ),
1904 :     &HTML::fid_link( $cgi, $fid ),
1905 :     [ $sim->[10], "TD NoWrap" ],
1906 :     scalar $fig->function_of( $fid ),
1907 :     $fig->genus_species( $fig->genome_of( $fid ) ),
1908 :     $alii
1909 :     ]
1910 :     );
1911 :     push( @ids, $fid );
1912 :     last if ++$n >= 1000; # Stop after 1000
1913 : overbeek 1.30 }
1914 : golsen 1.103
1915 : overbeek 1.30 $ids = join(",",@ids);
1916 : golsen 1.103 push( @$html, $cgi->start_form( -method => 'post',
1917 : golsen 1.141 -target => '_blank',
1918 : olson 1.89 -action => $this_script
1919 : parrello 1.75 ),
1920 : golsen 1.103 $cgi->hidden(-name => 'ids', -value => $ids),
1921 :     $cgi->hidden(-name => 'qid', -value => $query),
1922 :     $cgi->hidden(-name => 'qseq', -value => $seq),
1923 :     $cgi->submit('Extract Matched Sequences'),
1924 :     # $cgi->submit('Align Matched Sequences'),
1925 :     &HTML::make_table($col_hdrs,$table,"Best Hits"),
1926 :     $cgi->submit('Extract Matched Sequences'),
1927 :     # $cgi->submit('Align Matched Sequences'),
1928 :     $cgi->end_form
1929 :     );
1930 : overbeek 1.30 }
1931 : overbeek 1.17
1932 : golsen 1.103
1933 : golsen 1.141 #
1934 :     # Verify existence of blast database for a sequence file named $db.
1935 :     # Allow existence of formatted database of correct name, without a source
1936 :     # file. $type is 'p' or 'n' for protein (default) and nucleotide databases,
1937 :     # respectively.
1938 :     #
1939 : efrank 1.1 sub verify_db {
1940 : golsen 1.141 my( $db, $type ) = @_;
1941 :    
1942 :     # Need a database name
1943 :     return 0 unless defined( $db ) && length( $db );
1944 :    
1945 :     # Default type is protein
1946 :     $type ||= 'p';
1947 :     my ( $prot, $db_file ) = $type =~ /^p/i ? ( 'T', "$db.psq" )
1948 :     : ( 'F', "$db.nsq" );
1949 :    
1950 :     # Check for database
1951 :     return 1 if ( -s $db_file ) && ( ( ! -f $db ) || ( -M $db_file <= -M $db ) );
1952 :    
1953 :     # If there is no up-to-date blast database, we need sequences to build it.
1954 :     return 0 unless -s $db;
1955 :    
1956 :     # Run formatdb and check that the file now exists
1957 :     my $formatdb = "$FIG_Config::ext_bin/formatdb";
1958 :     return system( $formatdb, -p => $prot, -i => $db ) ? 0 : -s $db_file;
1959 :     }
1960 : efrank 1.1
1961 : overbeek 1.7
1962 :     sub export_assignments {
1963 :     my($fig,$cgi,$html,$who) = @_;
1964 :     my($genome,$x);
1965 :    
1966 : olson 1.124 my @genomes = map { $_ =~ /(\d+\.\d+)/; $1 } $cgi->param('korgs');
1967 : overbeek 1.7
1968 :     if (@genomes == 0)
1969 :     {
1970 : parrello 1.75 @genomes = $fig->genomes;
1971 : overbeek 1.7 }
1972 :    
1973 : overbeek 1.10 my @assignments = $fig->assignments_made(\@genomes,$who,$cgi->param('after_date'));
1974 : overbeek 1.7 if (@assignments == 0)
1975 :     {
1976 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no assignments where made by $who"));
1977 : overbeek 1.7 }
1978 :     else
1979 :     {
1980 : parrello 1.75 my $col_hdrs = ["FIG id", "External ID", "Genus/Species","Assignment"];
1981 :     my $tab = [];
1982 :     my($x,$peg,$func);
1983 :     foreach $x (@assignments)
1984 :     {
1985 :     ( $peg, $func ) = @$x;
1986 :     push( @$tab,[ HTML::set_prot_links( $cgi, $peg ),
1987 :     HTML::set_prot_links( $cgi, ext_id( $fig, $peg ) ),
1988 :     $fig->genus_species($fig->genome_of($peg)),
1989 :     $func
1990 :     ] );
1991 :     }
1992 :    
1993 :     if ($cgi->param('save_assignments'))
1994 :     {
1995 :     my $user = $cgi->param('save_user');
1996 :     if ($user)
1997 :     {
1998 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
1999 :     my $file = &FIG::epoch_to_readable(time) . ":$who:exported_from_local_SEED";
2000 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
2001 :     {
2002 :     print TMP join("",map { join("\t",@$_) . "\n" } map { [$_->[0],$_->[3]] } @$tab);
2003 :     close(TMP);
2004 :     }
2005 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
2006 :     }
2007 :     else
2008 :     {
2009 :     push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
2010 :     }
2011 :     }
2012 :    
2013 :     if ($cgi->param('tabs'))
2014 :     {
2015 :     print $cgi->header;
2016 :     print "<pre>\n";
2017 :     print join("",map { join("\t",@$_) . "\n" } @$tab);
2018 :     print "</pre>\n";
2019 : olson 1.137 &done;
2020 : parrello 1.75 }
2021 :     else
2022 :     {
2023 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Assignments Made by $who"));
2024 :     }
2025 : overbeek 1.7 }
2026 :     }
2027 :    
2028 :     sub ext_id {
2029 :     my($fig,$peg) = @_;
2030 :    
2031 :     my @mapped = grep { $_ !~ /^fig/ } map { $_->[0] } $fig->mapped_prot_ids($peg);
2032 :     if (@mapped == 0)
2033 :     {
2034 : parrello 1.75 return $peg;
2035 : overbeek 1.7 }
2036 :    
2037 :     my @tmp = ();
2038 :     if ((@tmp = grep { $_ =~ /^sp/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2039 :     if ((@tmp = grep { $_ =~ /^pir/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2040 :     if ((@tmp = grep { $_ =~ /^gi/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2041 :     if ((@tmp = grep { $_ =~ /^tr/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2042 :     if ((@tmp = grep { $_ =~ /^tn/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2043 :     if ((@tmp = grep { $_ =~ /^kegg/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
2044 :    
2045 :     return $peg;
2046 :     }
2047 :    
2048 : overbeek 1.28 sub translate_assignments {
2049 :     my($fig,$cgi,$html,$from_func,$to_func) = @_;
2050 :    
2051 : overbeek 1.56 my @funcs = grep { $_ =~ /^\S.*\S$/ } split(/[\012\015]+/,$from_func);
2052 :    
2053 : overbeek 1.28 my $user = $cgi->param('save_user');
2054 :     if ($user)
2055 :     {
2056 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
2057 :     my $file = &FIG::epoch_to_readable(time) . ":$user:translation";
2058 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
2059 :     {
2060 : overbeek 1.55 my($peg,$func);
2061 : overbeek 1.28
2062 : overbeek 1.56 foreach $from_func (@funcs)
2063 : overbeek 1.28 {
2064 : overbeek 1.57 my $from_funcQ = quotemeta $from_func;
2065 :    
2066 : overbeek 1.56 foreach $peg ($fig->seqs_with_role($from_func))
2067 : overbeek 1.28 {
2068 : overbeek 1.56 if ($peg =~ /^fig\|/)
2069 : overbeek 1.28 {
2070 : overbeek 1.56 $func = $fig->function_of($peg);
2071 : overbeek 1.125 my $comment = "";
2072 :     if ($func =~ /^([^\#]*)(\#.*)$/)
2073 :     {
2074 :     $comment = $2;
2075 :     $func = $1;
2076 :     $func =~ s/\s+$//;
2077 :     $comment = $comment ? " $comment" : "";
2078 :     }
2079 :    
2080 : overbeek 1.56 if ($func eq $from_func)
2081 :     {
2082 : overbeek 1.125 print TMP "$peg\t$to_func$comment\n";
2083 : overbeek 1.56 }
2084 : overbeek 1.73 else
2085 : overbeek 1.57 {
2086 : overbeek 1.73 my @pieces = grep { $_ } split(/(\s+[\/@]\s+)|(\s*;\s+)/,$func);
2087 :     if (@pieces > 1)
2088 :     {
2089 :     my $func1 = join("",map { $_ =~ s/^$from_funcQ$/$to_func/; $_ } @pieces);
2090 :     if ($func ne $func1)
2091 :     {
2092 : overbeek 1.125 print TMP "$peg\t$func1$comment\n";
2093 : overbeek 1.73 }
2094 :     }
2095 : overbeek 1.57 }
2096 : overbeek 1.28 }
2097 :     }
2098 :     }
2099 :     close(TMP);
2100 :     }
2101 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
2102 :     }
2103 :     else
2104 :     {
2105 : parrello 1.75 push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
2106 : overbeek 1.28 }
2107 :     }
2108 : efrank 1.92
2109 :     sub find_pegs_by_cv1 {
2110 :     my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;
2111 :    
2112 :     # Remember kind of search that got us hear so we can call back
2113 :     # with same kind
2114 :     my $search = "Search";
2115 :     if ($cgi->param('Search genome selected below')) {
2116 :     $search=uri_escape('Search genome selected below');
2117 :     } elsif ( $cgi->param('Search Selected Organisms') ) {
2118 :     $search = uri_escape('Search Selected Organisms');
2119 :     } elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
2120 :     $search = uri_escape('Find Genes in Org that Might Play the Role');
2121 :     }
2122 :    
2123 :     my $search_results = $fig->search_cv_file($cv, $pattern);
2124 :    
2125 :     my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
2126 :     my $find_table_rows;
2127 :     my $counter = 0;
2128 :     for my $r (@$search_results)
2129 :     {
2130 :     my @temp = split("\t",$r);
2131 :     my $row = [];
2132 :     my $id= $temp[1];
2133 :     my $term = $temp[2];
2134 :     my $id_and_term = $id."; ".$term;
2135 :     my $pattern=uri_escape("$id; $term");
2136 :    
2137 :     my $link = "index.cgi?pattern=$pattern&Search=1&user=$user";
2138 :     my $cb = "<a href=$link>Find PEGs</a>";
2139 :    
2140 :     #feh my $cb = $cgi->submit(-name=>'$search', -value=>'Find PEGs');
2141 :     #my $cb_value = $cv."split_here".$id."; ".$term;
2142 :     #my $cb ="<input type=checkbox name=find_checked_$counter value='$cb_value'>" ;
2143 :     push(@$row,$cb);
2144 :     push(@$row,$cv);
2145 :     push(@$row,$id_and_term);
2146 :     push(@$find_table_rows,$row);
2147 :     $counter = $counter + 1;
2148 :     }
2149 :    
2150 :     my $find_terms_button="";
2151 :     if ($counter > 0) {
2152 :     $find_terms_button= $cgi->submit(-name=>'$search', -value=>'$search');
2153 :     }
2154 :    
2155 :     # build the page
2156 :     push @$html,
2157 :     $cgi->start_form(),
2158 :     $cgi->hidden(-name=>'user', -value=>'$user'),
2159 :     $cgi->br,
2160 :     "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
2161 :     $cgi->hr,
2162 :     "<h4>Terms Matching Your Criteria </h4>\n",
2163 :     $cgi->br,
2164 :     &HTML::make_table($find_col_hdrs,$find_table_rows),
2165 :     $cgi->br,
2166 :     $find_terms_button,
2167 :     $cgi->end_form;
2168 :    
2169 :     return $html;
2170 :     }
2171 :    
2172 :     sub find_pegs_by_cv {
2173 :     my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;
2174 :    
2175 :     # Remember kind of search that got us hear so we can call back
2176 :     # with same kind (not working so force to simple Search)
2177 :    
2178 :     my $search = "Search";
2179 :    
2180 :     #if ($cgi->param('Search genome selected below')) {
2181 :     # $search='Search genome selected below';
2182 :     #} elsif ( $cgi->param('Search Selected Organisms') ) {
2183 :     # $search = 'Search Selected Organisms';
2184 :     #} elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
2185 :     # $search = 'Find Genes in Org that Might Play the Role';
2186 :     #}
2187 :    
2188 :     my $search_results = $fig->search_cv_file($cv, $pattern);
2189 :    
2190 :     my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
2191 :     my @patterns=();
2192 :     for my $r (@$search_results)
2193 :     {
2194 :     my @temp = split("\t",$r);
2195 :     my $id= $temp[1];
2196 :     my $term = $temp[2];
2197 :     my $pattern="$id; $term";
2198 :    
2199 :     push(@patterns,$pattern);
2200 :     }
2201 :    
2202 :     my @pattern_radio;
2203 :     if ($#patterns + 1) {
2204 :     @pattern_radio = $cgi->radio_group( -name => 'pattern',
2205 :     -values => [ @patterns ]
2206 :     );
2207 :     } else {
2208 :     @pattern_radio = ("Nothing found");
2209 :     }
2210 :    
2211 :     my $find_terms_button= $cgi->submit(-name=>"Search", -value=>"Search");
2212 :    
2213 :     # build the page
2214 :     push @$html,
2215 :     $cgi->start_form(),
2216 :     $cgi->hidden(-name=>'user', -value=>'$user'),
2217 :     $cgi->br,
2218 :     "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
2219 :     $cgi->hr,
2220 :     "<h4>$cv Terms Matching Your Criteria </h4>\n",
2221 :     $cgi->br,
2222 :     $find_terms_button,
2223 :     $cgi->br,
2224 :     $cgi->br,
2225 :     join( "<br>", @pattern_radio),
2226 :     # &HTML::make_table($find_col_hdrs,$find_table_rows),
2227 :     $cgi->br,
2228 :     $find_terms_button,
2229 :     $cgi->end_form;
2230 :    
2231 :     return $html;
2232 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3