[Bio] / FigWebServices / index.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/index.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.80 - (view) (download)

1 : overbeek 1.49 ### start
2 :    
3 : efrank 1.1 use FIG;
4 :    
5 : golsen 1.65 use strict;
6 : parrello 1.78 use Tracer;
7 : golsen 1.65 use FIGjs qw( toolTipScript );
8 :     use GenoGraphics qw( render );
9 :     use IPC::Open2 qw( open2 );
10 :    
11 : olson 1.36 use POSIX;
12 : efrank 1.1 use HTML;
13 : redwards 1.80 use raelib; # note this is only being used for the cookies at the moment, and this can be removeed if necessary
14 :     my $raelib=new raelib;
15 : golsen 1.65
16 : efrank 1.1 use CGI;
17 :     my $cgi = new CGI;
18 :    
19 : olson 1.54 my $fig;
20 :     eval {
21 :     $fig = new FIG;
22 :     };
23 :    
24 :     if ($@ ne "")
25 :     {
26 :     my $err = $@;
27 :    
28 :     my(@html);
29 :    
30 :     push(@html, $cgi->p("Error connecting to SEED database."));
31 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
32 :     {
33 : parrello 1.75 push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
34 : olson 1.54 }
35 :     else
36 :     {
37 : parrello 1.75 push(@html, $cgi->pre($err));
38 : olson 1.54 }
39 :     &HTML::show_page($cgi, \@html, 1);
40 :     exit;
41 :     }
42 : parrello 1.78 Trace("Connected to FIG.") if T(2);
43 : overbeek 1.28 my($map,@orgs,$user,$map,$org,$made_by,$from_func,$to_func);
44 : efrank 1.1
45 :     if (0)
46 :     {
47 : overbeek 1.50 my $VAR1;
48 :     eval(join("",`cat /tmp/index_parms`));
49 :     $cgi = $VAR1;
50 :     # print STDERR &Dumper($cgi);
51 :     }
52 :    
53 :     if (0)
54 :     {
55 : efrank 1.1 print $cgi->header;
56 :     my @params = $cgi->param;
57 :     print "<pre>\n";
58 :     foreach $_ (@params)
59 :     {
60 : parrello 1.75 print "$_\t:",join(",",$cgi->param($_)),":\n";
61 : efrank 1.1 }
62 : overbeek 1.50
63 :     if (0)
64 : efrank 1.1 {
65 : parrello 1.75 if (open(TMP,">/tmp/index_parms"))
66 :     {
67 :     print TMP &Dumper($cgi);
68 :     close(TMP);
69 :     }
70 : efrank 1.1 }
71 :     exit;
72 :     }
73 :    
74 :     $ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};
75 :    
76 :     my $html = [];
77 : golsen 1.23
78 : efrank 1.5
79 : overbeek 1.51 my($pattern,$seq_pat,$tool,$ids,$subsearch);
80 : redwards 1.80
81 :     my $user = $cgi->param('user') || [$raelib->cookie($cgi)]->[1]->{'user'} || "";
82 :     if (! $cgi->param('user')) { $cgi->param(-name=>'user', -value=> $user) }
83 : efrank 1.1
84 : overbeek 1.28 if ($cgi->param('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'))
85 : efrank 1.1 {
86 : parrello 1.78 Trace("Gene search chosen.") if T(2);
87 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Phylogenetic Signatures</TITLE>\n";
88 : efrank 1.1 $ENV{"REQUEST_METHOD"} = "GET";
89 :     $ENV{"QUERY_STRING"} = "user=$user";
90 :     my @out = `./sigs.cgi`;
91 : overbeek 1.49 print @out;
92 :     exit;
93 :     }
94 :     #-----------------------------------------------------------------------
95 :     # Statistics for a single organism
96 :     #-----------------------------------------------------------------------
97 :     elsif ($cgi->param('statistics'))
98 :     {
99 : parrello 1.78 Trace("Statistics chosen.") if T(2);
100 : overbeek 1.49 @orgs = $cgi->param('korgs');
101 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
102 :     if (@orgs != 1)
103 :     {
104 : golsen 1.72 unshift @$html, "<TITLE>The SEED Statistics Page</TITLE>\n";
105 : overbeek 1.49 push(@$html,$cgi->h1('Please select a single organism to get statistcs'));
106 :     }
107 :     else
108 :     {
109 : parrello 1.75 $ENV{"REQUEST_METHOD"} = "GET";
110 :     $ENV{"QUERY_STRING"} = "user=$user&genome=$orgs[0]";
111 :     my @out = `./genome_statistics.cgi`;
112 :     print @out;
113 :     exit;
114 : overbeek 1.49 }
115 : efrank 1.1 }
116 : overbeek 1.17 #-----------------------------------------------------------------------
117 : overbeek 1.51 # Locate PEGs in Subsystems
118 :     #-----------------------------------------------------------------------
119 :     elsif ($cgi->param('Find PEGs') && ($subsearch = $cgi->param('subsearch')))
120 :     {
121 : parrello 1.78 Trace("PEG find chosen.") if T(2);
122 : overbeek 1.51 my $genome = $cgi->param('genome');
123 :     my (@pegs,$peg);
124 : overbeek 1.52
125 :     my @poss = $fig->by_alias($subsearch);
126 :     if (@poss > 0) { $subsearch = $poss[0] }
127 :    
128 : overbeek 1.51 if ($subsearch =~ /(fig\|\d+\.\d+\.peg\.\d+)/)
129 :     {
130 : parrello 1.75 # handle searching for homologs that occur in subsystems
131 :     $peg = $1;
132 :     @pegs = ($peg);
133 :     push(@pegs,map { $_->id2 } $fig->sims( $peg, 500, 1.0e-10, "fig"));
134 :     if ($genome)
135 :     {
136 :     my $genomeQ = quotemeta $genome;
137 :     @pegs = grep { $_ =~ /^fig\|$genomeQ/ } @pegs;
138 :     }
139 : overbeek 1.51 }
140 :     else
141 :     {
142 : parrello 1.75 # handle searching for PEGs with functional role in subsystems
143 :     @pegs = $fig->seqs_with_role($subsearch,"master",$genome);
144 : overbeek 1.51 }
145 :    
146 :     print $cgi->header;
147 :     if (@pegs == 0)
148 :     {
149 : parrello 1.75 print $cgi->h1("Sorry, could not even find PEGs to check");
150 : overbeek 1.51 }
151 :     else
152 :     {
153 : parrello 1.75 my(@pairs,$pair,@sub);
154 :     @pairs = map { $peg = $_;
155 :     @sub = $fig->peg_to_subsystems($peg);
156 :     map { [$peg,$_] } @sub } @pegs;
157 :     if (@pairs == 0)
158 :     {
159 :     print $cgi->h1("Sorry, could not map any PEGs to subsystems");
160 :     }
161 :     else
162 :     {
163 :     my($uni,$uni_func);
164 :     my $col_hdrs = ["PEG","Genome","Function","UniProt","UniProt Function","Subsystem"];
165 :     my $tab = [ map { $pair = $_; $uni = $fig->to_alias($pair->[0],"uni");
166 :     ($uni,$uni_func) = $uni ? (&HTML::uni_link($cgi,$uni),scalar $fig->function_of($uni)) : ("","");
167 :     [&HTML::fid_link($cgi,$pair->[0]),
168 :     $fig->org_of($pair->[0]),
169 :     scalar $fig->function_of($pair->[0]),
170 :     $uni,$uni_func,
171 :     &HTML::sub_link($cgi,$pair->[1])] } @pairs];
172 :     print &HTML::make_table($col_hdrs,$tab,"PEGs that Occur in Subsystems");
173 :     }
174 : overbeek 1.51 }
175 :     exit;
176 :     }
177 :     #-----------------------------------------------------------------------
178 : overbeek 1.31 # Align Sequences
179 :     #-----------------------------------------------------------------------
180 :     elsif ($cgi->param('Align Sequences'))
181 :     {
182 : parrello 1.78 Trace("Sequence alignment chosen.");
183 : overbeek 1.31 my $seqs = $cgi->param('seqids');
184 :     $seqs =~ s/^\s+//;
185 :     $seqs =~ s/\s+$//;
186 :     my @seq_ids = split(/[ \t,;]+/,$seqs);
187 :     if (@seq_ids < 2)
188 :     {
189 : parrello 1.75 print $cgi->header;
190 :     print $cgi->h1("Sorry, you need to specify at least two sequence IDs");
191 : overbeek 1.31 }
192 :     else
193 :     {
194 : parrello 1.75 $ENV{"REQUEST_METHOD"} = "GET";
195 :     $_ = join('&checked=',@seq_ids);
196 :     $ENV{"QUERY_STRING"} = "user=$user&align=1&checked=" . $_;
197 :     my @out = `./fid_checked.cgi`;
198 :     print join("",@out);
199 : overbeek 1.31 }
200 :     exit;
201 :     }
202 :     #-----------------------------------------------------------------------
203 : overbeek 1.17 # Search (text) || Find Genes in Org that Might Play the Role
204 :     #-----------------------------------------------------------------------
205 : golsen 1.59 elsif ( ( $pattern = $cgi->param('pattern') )
206 :     && ( $cgi->param('Search')
207 :     || $cgi->param('Search genome selected below')
208 : redwards 1.63 || $cgi->param('Search Selected Organisms')
209 : golsen 1.59 || $cgi->param('Find Genes in Org that Might Play the Role')
210 :     )
211 :     )
212 : efrank 1.1 {
213 : parrello 1.78 Trace("Pattern search chosen.") if T(2);
214 : overbeek 1.17 # Remove leading and trailing spaces from pattern -- GJO:
215 :     $pattern =~ s/^\s+//;
216 :     $pattern =~ s/\s+$//;
217 : efrank 1.1 if ($cgi->param('Find Genes in Org that Might Play the Role') &&
218 : parrello 1.75 (@orgs = $cgi->param('korgs')) && (@orgs == 1))
219 : efrank 1.1 {
220 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Genes in that Might Play Specific Role</TITLE>\n";
221 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
222 :     $ENV{"REQUEST_METHOD"} = "GET";
223 :     $ENV{"QUERY_STRING"} = "user=$user&request=find_in_org&role=$pattern&org=$orgs[0]";
224 :     my @out = `./pom.cgi`;
225 :     print join("",@out);
226 :     exit;
227 : efrank 1.1 }
228 :     else
229 :     {
230 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Search Results</TITLE>\n";
231 :     &show_indexed_objects($fig, $cgi, $html, $pattern);
232 : efrank 1.1 }
233 :     }
234 : overbeek 1.17 #-----------------------------------------------------------------------
235 :     # Metabolic Overview
236 :     #-----------------------------------------------------------------------
237 : efrank 1.1 elsif (($map = $cgi->param('kmap')) && $cgi->param('Metabolic Overview'))
238 :     {
239 : parrello 1.78 Trace("Metabolic overview chosen.") if T(2);
240 : olson 1.38 if ($map =~ /\(([^)]*)\)$/)
241 :     {
242 : parrello 1.75 $map = $1;
243 : olson 1.38 }
244 :     else
245 :     {
246 : parrello 1.75 # ??? Gary ???
247 : olson 1.38 }
248 :    
249 :     #$map =~ s/^.*\((MAP\d+)\).*$/$1/;
250 : efrank 1.1 @orgs = $cgi->param('korgs');
251 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
252 :     $ENV{"REQUEST_METHOD"} = "GET";
253 :     if (@orgs > 0)
254 :     {
255 : parrello 1.75 $ENV{"QUERY_STRING"} = "user=$user&map=$map&org=$orgs[0]";
256 : efrank 1.1 }
257 :     else
258 :     {
259 : parrello 1.75 $ENV{"QUERY_STRING"} = "user=$user&map=$map";
260 : efrank 1.1 }
261 :    
262 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Metabolic Overview</TITLE>\n";
263 : olson 1.38 my @out = `./show_map.cgi`;
264 : efrank 1.1 &HTML::trim_output(\@out);
265 : golsen 1.23 push( @$html, "<br>\n", @out );
266 : efrank 1.1 }
267 : overbeek 1.17
268 :     #-----------------------------------------------------------------------
269 :     # Search for Matches (sequence or pattern)
270 :     #-----------------------------------------------------------------------
271 : efrank 1.1 elsif (($seq_pat = $cgi->param('seq_pat')) &&
272 :     ($tool = $cgi->param('Tool')) &&
273 :     $cgi->param('Search for Matches'))
274 :     {
275 : parrello 1.78 Trace("Match search chosen.") if T(2);
276 : overbeek 1.30 @orgs = $cgi->param('korgs');
277 :     if (@orgs > 0)
278 :     {
279 : parrello 1.75 @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
280 : overbeek 1.30 }
281 :     else
282 :     {
283 : parrello 1.75 @orgs = ("");
284 : overbeek 1.30 }
285 :    
286 : efrank 1.1 if ($tool =~ /blast/)
287 :     {
288 : parrello 1.75 unshift @$html, "<TITLE>The SEED: BLAST Search Results</TITLE>\n";
289 :     &run_blast($fig,$cgi,$html,$orgs[0],$tool,$seq_pat);
290 : efrank 1.1 }
291 :     elsif ($tool =~ /Protein scan_for_matches/)
292 :     {
293 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Protein Pattern Match Results</TITLE>\n";
294 :     &run_prot_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
295 : efrank 1.1 }
296 :     elsif ($tool =~ /DNA scan_for_matches/)
297 :     {
298 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Nucleotide Pattern Match Results</TITLE>\n";
299 :     &run_dna_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
300 : efrank 1.1 }
301 :     }
302 : overbeek 1.7 elsif (($made_by = $cgi->param('made_by')) && $cgi->param('Extract Assignments'))
303 :     {
304 : parrello 1.78 Trace("Assignment export chosen.") if T(2);
305 : overbeek 1.7 &export_assignments($fig,$cgi,$html,$made_by);
306 :     }
307 : overbeek 1.28 elsif ($cgi->param('Generate Assignments via Translation') &&
308 :     ($from_func = $cgi->param('from_func')) &&
309 :     ($to_func = $cgi->param('to_func')))
310 :     {
311 : parrello 1.78 Trace("Assignment translate chosen.") if T(2);
312 : overbeek 1.28 &translate_assignments($fig,$cgi,$html,$from_func,$to_func);
313 :     }
314 : overbeek 1.30 elsif ($cgi->param('Extract Matched Sequences') && ($ids = $cgi->param('ids')))
315 :     {
316 : parrello 1.78 Trace("Matched sequence extract chosen.") if T(2);
317 : overbeek 1.30 my @ids = split(/,/,$ids);
318 :     my($list_to,$i);
319 :     if ($list_to = $cgi->param('list_to'))
320 :     {
321 : parrello 1.75 for ($i=0; ($i < @ids) && ($ids[$i] ne $list_to); $i++) {}
322 :     if ($i < @ids)
323 :     {
324 :     $#ids = $i;
325 :     }
326 : overbeek 1.30 }
327 :    
328 :     my($id,$seq,$i,$func);
329 :     push(@$html,$cgi->pre);
330 :    
331 :     foreach $id (@ids)
332 :     {
333 : parrello 1.75 if ($seq = $fig->get_translation($id))
334 :     {
335 :     $func = $fig->function_of($id);
336 :     push(@$html,">$id $func\n");
337 :     for ($i=0; ($i < length($seq)); $i += 60)
338 :     {
339 :     if ($i > (length($seq) - 60))
340 :     {
341 :     push(@$html,substr($seq,$i) . "\n");
342 :     }
343 :     else
344 :     {
345 :     push(@$html,substr($seq,$i,60) . "\n");
346 :     }
347 :     }
348 :     }
349 : overbeek 1.30 }
350 :     push(@$html,$cgi->end_pre);
351 :     }
352 : overbeek 1.17
353 :     #-----------------------------------------------------------------------
354 :     # Initial search page
355 :     #-----------------------------------------------------------------------
356 : efrank 1.1 else
357 :     {
358 : parrello 1.78 Trace("SEED Entry page chosen.") if T(2);
359 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Entry Page</TITLE>\n";
360 : efrank 1.1 &show_initial($fig,$cgi,$html);
361 :     }
362 :     &HTML::show_page($cgi,$html,1);
363 : overbeek 1.49 exit;
364 : efrank 1.1
365 : overbeek 1.17
366 :     #==============================================================================
367 :     # Initial page (alias search)
368 :     #==============================================================================
369 :    
370 : efrank 1.1 sub show_initial {
371 :     my($fig,$cgi,$html) = @_;
372 :     my($map,$name,$olrg,$gs);
373 :    
374 : golsen 1.47 my( $a, $b, $e, $v, $env ) = $fig->genome_counts;
375 :     push(@$html,$cgi->h2("Contains $a archaeal, $b bacterial, $e eukaryal, $v viral and $env environmental genomes"));
376 :     my( $a, $b, $e ) = $fig->genome_counts("complete");
377 :     push(@$html,$cgi->h2("Of these, $a archaeal, $b bacterial and $e eukaryal genomes are more-or-less complete"),$cgi->hr);
378 : efrank 1.1
379 :     push(@$html,
380 : parrello 1.75 $cgi->h2('Work on Subsystems'),
381 : overbeek 1.46
382 : parrello 1.75 # $cgi->start_form(-action => "ssa2.cgi"),
383 :     # "Enter user: ",
384 :     # $cgi->textfield(-name => "user", -size => 20),
385 :     # $cgi->submit('Work on Subsystems'),
386 :     # $cgi->end_form,
387 :    
388 :     # $cgi->h2('Work on Subsystems Using New, Experimental Code'),
389 :     "This is the <i>new</i> subsystems code, and is now officially released.",
390 :     $cgi->start_form(-action => "subsys.cgi"),
391 :     "Enter user: ",
392 :     $cgi->textfield(-name => "user", -size => 20),
393 :     $cgi->submit('Work on Subsystems'),
394 :     $cgi->end_form,
395 :     $cgi->hr,
396 :     );
397 : olson 1.42
398 :     push(@$html,
399 : parrello 1.75 $cgi->start_form(-action => "index.cgi"),
400 :     $cgi->h2('Searching for Genes or Functional Roles Using Text'),
401 :     "<table><tr>",
402 : overbeek 1.17 "<td>Search Pattern: </td><td>",
403 : parrello 1.75 $cgi->textfield(-name => "pattern", -size => 65),
404 :     "</td></tr><tr>",
405 :     "<td>User ID:</td><td>",
406 :     $cgi->textfield(-name => "user", -size => 20),
407 :     " [optional] ",
408 : overbeek 1.17 "&nbsp; &nbsp; Max Genes: ",
409 : parrello 1.75 $cgi->textfield(-name => "maxpeg", -size => 6, -value => 100),
410 : overbeek 1.17 "&nbsp; &nbsp; Max Roles: ",
411 : parrello 1.75 $cgi->textfield(-name => "maxrole", -size => 6, -value => 100),
412 :     $cgi->checkbox(-name => "substring_match", -label => 'Allow substring match'),
413 :     "</td></td></table>",
414 :     $cgi->submit('Search'),
415 :     $cgi->submit('Search genome selected below'),
416 :     $cgi->reset('Clear'),
417 :     $cgi->hr);
418 : olson 1.41
419 : golsen 1.47 my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
420 :    
421 :     #
422 :     # Canonical names must match the keywords used in the DBMS. They are
423 :     # defined in compute_genome_counts.pl
424 :     #
425 :     my %canonical = (
426 :     'All' => undef,
427 :     'Archaea' => 'Archaea',
428 :     'Bacteria' => 'Bacteria',
429 :     'Eucarya' => 'Eukaryota',
430 :     'Viruses' => 'Virus',
431 :     'Environmental samples' => 'Environmental Sample'
432 :     );
433 :    
434 :     my $req_dom = $cgi->param( 'domain' ) || 'All';
435 :     my @domains = $cgi->radio_group( -name => 'domain',
436 :     -default => $req_dom,
437 :     -override => 1,
438 :     -values => [ @display ]
439 :     );
440 :    
441 :     my $n_domain = 0;
442 :     my %dom_num = map { ( $_, $n_domain++ ) } @display;
443 :     my $req_dom_num = $dom_num{ $req_dom } || 0;
444 :    
445 :     #
446 :     # Viruses and Environmental samples must have completeness = All (that is
447 :     # how they are in the database). Otherwise, default is Only "complete".
448 :     #
449 :     my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
450 :     : $cgi->param( 'complete' ) || 'Only "complete"';
451 :     my @complete = $cgi->radio_group( -name => 'complete',
452 :     -default => $req_comp,
453 :     -override => 1,
454 :     -values => [ 'All', 'Only "complete"' ]
455 :     );
456 :     #
457 :     # Use $fig->genomes( complete, restricted, domain ) to get org list:
458 :     #
459 :     my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
460 : redwards 1.48
461 :     my @orgs = sort map { $org = $_; my $gs = $fig->genus_species($org); my $gc=scalar $fig->all_contigs($org); "$gs ($org) [$gc contigs]" }
462 : golsen 1.47 $fig->genomes( $complete, undef, $canonical{ $req_dom } );
463 :    
464 :     my $n_genomes = @orgs;
465 :    
466 : golsen 1.59 push( @$html, $cgi->h2('If You Need to Pick a Genome for Options Below'),
467 : golsen 1.47 "<TABLE>\n",
468 :     " <TR>\n",
469 :     " <TD>",
470 : parrello 1.75 $cgi->scrolling_list( -name => 'korgs',
471 : golsen 1.47 -values => [ @orgs ],
472 : redwards 1.63 -size => 10,
473 : golsen 1.60 ), $cgi->br,
474 : golsen 1.47 "$n_genomes genomes shown ",
475 : golsen 1.60 $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
476 : parrello 1.75 "Show some ", $cgi->submit('statistics')," of the selected genome",
477 : overbeek 1.77 " </TD>");
478 :    
479 :     push(@$html,
480 : golsen 1.47 " <TD>",
481 :     join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
482 :     join( "<br>", "<b>Completeness?</b>", @complete), "\n",
483 :     "</TD>",
484 :     " </TR>\n",
485 :     "</TABLE>\n",
486 : golsen 1.60 $cgi->hr
487 :     );
488 : overbeek 1.49
489 : golsen 1.47 push( @$html, $cgi->h2('Finding Candidates for a Functional Role'),
490 : parrello 1.75 "Make sure that you type the functional role you want to search for in the Search Pattern above",
491 :     $cgi->br,
492 :     $cgi->submit('Find Genes in Org that Might Play the Role'),
493 :     $cgi->hr);
494 : overbeek 1.17
495 : golsen 1.60 my @maps = sort map { $map = $_; $name = $fig->map_name($map); "$name ($map)" } $fig->all_maps;
496 :    
497 : golsen 1.47 push( @$html, $cgi->h2('Metabolic Overviews and Subsystem Maps (via KEGG & SEED) - Choose Map'),
498 : parrello 1.75 $cgi->submit('Metabolic Overview'),
499 :     $cgi->br,
500 :     $cgi->br,
501 :     $cgi->scrolling_list(-name => 'kmap',
502 : efrank 1.1 -values => [@maps],
503 : parrello 1.75 -size => 10
504 : efrank 1.1 ),
505 : parrello 1.75 $cgi->hr);
506 : overbeek 1.17
507 : golsen 1.47 push( @$html, $cgi->h2('Searching DNA or Protein Sequences (in a selected organism)'),
508 : parrello 1.75 "<TABLE>\n",
509 :     " <TR>\n",
510 :     " <TD>Sequence/Pattern: </TD>",
511 :     " <TD Colspan=3>", $cgi->textarea(-name => 'seq_pat', -rows => 10, -cols => 70), "</TD>\n",
512 :     " </TR>\n",
513 :     " <TR>\n",
514 :     " <TD>Search Program: </TD>",
515 :     " <TD>", $cgi->popup_menu(-name => 'Tool', -values => ['blastp', 'blastx', 'blastn', 'tblastn', 'blastp against complete genomes', 'Protein scan_for_matches', 'DNA scan_for_matches'], -default => 'blastp'), " </TD>",
516 :     " <TD> Program Options:</TD>",
517 :     " <TD>", $cgi->textfield( -name => "blast_options", -size => 27 ), "</TD>",
518 :     " </TR>\n",
519 :     "</TABLE>\n",
520 :     $cgi->submit('Search for Matches'),
521 :     $cgi->hr);
522 : overbeek 1.17
523 : olson 1.41 #
524 :     # Make assignment export tbl.
525 :     #
526 :    
527 :     my @atbl;
528 : golsen 1.64 push(@atbl, [ "Extract assignments made by ",
529 : parrello 1.75 $cgi->textfield(-name => "made_by", -size => 25) . " (do not prefix with <b>master:</b>)" ]);
530 : golsen 1.64 push(@atbl, [ "Save as user: ",
531 : parrello 1.75 $cgi->textfield(-name => "save_user", -size => 25) . " (do not prefix with <b>master:</b>)" ] );
532 : golsen 1.64 push(@atbl, [ "After date (MM/DD/YYYY) ",
533 : parrello 1.75 $cgi->textfield(-name => "after_date", -size => 15)]);
534 : olson 1.41
535 :     push(@$html,
536 : parrello 1.75 $cgi->h2('Exporting Assignments'),
537 :     &HTML::make_table(undef, \@atbl, '', border => 0),
538 :     $cgi->checkbox(-label => 'Tab-delimited Spreadsheet', -name => 'tabs', -value => 1),
539 :     $cgi->br,
540 :     $cgi->checkbox(-label => 'Save Assignments', -name => 'save_assignments', -value => 1),
541 :     $cgi->br,
542 :     $cgi->submit('Extract Assignments'),
543 :     $cgi->br, $cgi->br, $cgi->br,
544 :     "Alternatively, you can generate a set of assignments as translations of existing assignments. ",
545 :     "To do so, you need to make sure that you fill in the <b>Save as user</b> field just above. You ",
546 :     "should use something like <b>RossO</b> (leave out the <b>master:</b>). When you look at the assignments (and decide which ",
547 :     "to actually install), they will be made available under that name (but, when you access them, ",
548 :     "you will normally be using something like <b>master:RossO</b>)",
549 :     $cgi->br,$cgi->br,
550 :     "From: ",
551 :     $cgi->textarea(-name => 'from_func', -rows => 4, -cols => 100),
552 :     $cgi->br,$cgi->br,
553 :     "To:&nbsp;&nbsp;&nbsp;&nbsp; ",$cgi->textfield(-name => "to_func", -size => 100),
554 :     $cgi->br,
555 : overbeek 1.76 $cgi->a({class=>"help", target=>"help", href=>"Html/seedtips.html#replace_names"}, "Help with generate assignments via translation"),
556 : parrello 1.75 $cgi->submit('Generate Assignments via Translation'),
557 :     $cgi->hr,
558 :     $cgi->h2('Searching for Interesting Genes'),
559 :     $cgi->submit('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'),
560 : overbeek 1.17 $cgi->end_form
561 : parrello 1.75 );
562 : overbeek 1.14
563 :     push(@$html,
564 : parrello 1.75 $cgi->hr,
565 :     $cgi->h2('Process Saved Assignments Sets'),
566 :     $cgi->start_form(-action => "assignments.cgi"),
567 :     "Here you should include the <b>master:</b>. Thus use something like <b>master:RossO</b>",$cgi->br,
568 :     $cgi->br,
569 :     "Enter user: ",
570 :     $cgi->textfield(-name => "user", -size => 20),
571 :     $cgi->submit('Process Assignment Sets'),
572 : overbeek 1.17 $cgi->end_form
573 : parrello 1.75 );
574 : efrank 1.1
575 : overbeek 1.19 push(@$html,
576 : parrello 1.75 $cgi->hr,
577 :     $cgi->h2('Align Sequences'),
578 :     $cgi->start_form(-action => "index.cgi"),
579 :     "Enter user: ",
580 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
581 :     $cgi->submit('Align Sequences'),": ",
582 :     $cgi->textfield(-name => "seqids", -size => 100),
583 : overbeek 1.31 $cgi->end_form
584 : parrello 1.75 );
585 : overbeek 1.51
586 :     push(@$html,
587 : parrello 1.75 $cgi->hr,
588 :     $cgi->h2('Locate PEGs in Subsystems'),
589 :     "If you wish to locate PEGs in subsystems, you have two approaches supported. You can
590 : overbeek 1.56 give a FIG id, and you will get a list of all homologs in the designated genome that occur in subsystems.
591 :     Alternatively, you can specify a functional role, and all PEGs in the genome that match that role will be shown.",
592 : parrello 1.75 $cgi->start_form(-action => "index.cgi"),
593 :     "Enter user: ",
594 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
595 :     $cgi->br,"Genome: ",$cgi->textfield(-name => "genome", -size => 15),$cgi->br,
596 :     "Search: ",$cgi->textfield(-name => "subsearch", -size => 100),$cgi->br,
597 :     $cgi->submit('Find PEGs'),": ",
598 : overbeek 1.51 $cgi->end_form
599 : parrello 1.75 );
600 : efrank 1.1 }
601 :    
602 : overbeek 1.17
603 :     #==============================================================================
604 :     # Indexed objects (text search)
605 :     #==============================================================================
606 :    
607 : efrank 1.1 sub show_indexed_objects {
608 : golsen 1.22 my($fig, $cgi, $html, $pattern) = @_;
609 :     my($msg, $i);
610 : efrank 1.1
611 :     if ($pattern =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
612 :     {
613 : parrello 1.75 my $peg = $1;
614 :     my $user = $cgi->param('user');
615 :     $user = $user ? $user : "";
616 :     $ENV{'REQUEST_METHOD'} = "GET";
617 :     $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user";
618 :     $ENV{"REQUEST_URI"} =~ s/index.cgi/protein.cgi/;
619 : parrello 1.78 # TICK is like the back-tick ` operator, but works in Windows.
620 :     my @prot_out = TICK("./protein.cgi");
621 : parrello 1.75 print @prot_out;
622 :     exit;
623 : efrank 1.1 }
624 : overbeek 1.71 $pattern =~ s/([a-zA-Z0-9])\|([a-zA-Z0-9])/$1\\\|$2/ig;
625 : overbeek 1.17 push( @$html, $cgi->br );
626 : olson 1.70 my( $peg_index_data, $role_index_data ) = $fig->search_index($pattern, $cgi->param("substring_match") eq "on");
627 : overbeek 1.17 my $maxpeg = defined( $cgi->param("maxpeg") ) ? $cgi->param("maxpeg") : 100;
628 :     my $maxrole = defined( $cgi->param("maxrole") ) ? $cgi->param("maxrole") : 100;
629 :    
630 : redwards 1.53 # RAE added lines to allow searching within a single organism
631 : golsen 1.59 # if ($cgi->param('korgs'))
632 :     # {
633 :     # $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/;
634 :     # $org=$1; # this should be undef if korgs is not defined
635 :    
636 :     # push (@$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p);
637 :     # my @clean_data; my @clean_index;
638 :     # while (@$peg_index_data)
639 :     # {
640 :     # my ($data, $index)=(shift @$peg_index_data, shift @$role_index_data);
641 :     # next unless (${$data}[0] =~ /^fig\|$org\.peg/);
642 :     # push @clean_data, $data;
643 :     # push @clean_index, $index;
644 :     # }
645 :    
646 :     # @$peg_index_data=@clean_data;
647 :     # @$role_index_data=@clean_index;
648 :     # }
649 :     ## End of added lines
650 : redwards 1.53
651 : redwards 1.63 # RAE version with separate submit buttons and more than one org in korg
652 :     # this is used by organisms.cgi for group specific searches
653 :     if ( $cgi->param('korgs') && $cgi->param('Search Selected Organisms')
654 :     )
655 :     {
656 :     my @temp;
657 :     foreach my $org ($cgi->param('korgs'))
658 :     {
659 :     push @temp, grep { $_->[0] =~ /^fig\|$org/ } @$peg_index_data;
660 :     }
661 :     @$peg_index_data = @temp;
662 :     }
663 :    
664 : golsen 1.59 # GJO version with separate submit buttons
665 : redwards 1.53
666 : golsen 1.59 if ( $cgi->param('korgs') && $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/
667 :     && $cgi->param('Search genome selected below')
668 :     )
669 :     {
670 : parrello 1.75 my $org = $1;
671 :     push @$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p;
672 : mkubal 1.79 @$peg_index_data = grep { $_->[0] =~ /^fig\|$org\.*/ } @$peg_index_data;
673 : redwards 1.53 }
674 :    
675 : golsen 1.59 if ( ( $maxpeg > 0 ) && @$peg_index_data )
676 : overbeek 1.17 {
677 : parrello 1.75 # RAE: Added javascript buttons see below. Only two things are needed.
678 :     # The form must have a name parameter, and the one line of code for the
679 :     # buttons. Everything else is automatic
680 :    
681 :     push( @$html, $cgi->start_form( -method => 'post',
682 :     -target => "window$$",
683 :     -action => 'fid_checked.cgi',
684 :     -name => 'found_pegs'
685 :     ),
686 :     $cgi->hidden(-name => 'user', -value => $user),
687 :     "For Selected (checked) sequences: ",
688 :     $cgi->submit('get sequences'),
689 :     $cgi->submit('view annotations'),
690 :     $cgi->submit('assign/annotate'),
691 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
692 :     $cgi->br, $cgi->br
693 :     );
694 : efrank 1.1
695 : redwards 1.63 # RAE Add the check all/uncheck all boxes.
696 :     push (@$html, $cgi->br, &HTML::java_buttons("found_pegs", "checked"), $cgi->br);
697 : parrello 1.75
698 :     my $n = @$peg_index_data;
699 :     if ($n > $maxpeg)
700 :     {
701 :     $msg = "Showing first $maxpeg out of $n protein genes";
702 :     $#{$peg_index_data} = $maxpeg-1;
703 :     }
704 :     else
705 :     {
706 : mkubal 1.79 $msg = "Showing $n FEATURES";
707 : parrello 1.75 }
708 :    
709 : mkubal 1.79 my $col_hdrs = ["Sel","FEATURE","Organism","Aliases","Function","Who"];
710 : parrello 1.75 my $tab = [ map { format_peg_entry( $fig, $cgi, $_ ) } @$peg_index_data ];
711 :     push( @$html, &HTML::make_table($col_hdrs,$tab,$msg),
712 :     $cgi->br,
713 :     "For SELECTed (checked) sequences: ",
714 :     $cgi->submit('get sequences'),
715 :     $cgi->submit('view annotations'),
716 :     $cgi->submit('assign/annotate'),
717 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
718 :     $cgi->br,
719 :     $cgi->end_form
720 :     );
721 : efrank 1.1 }
722 : golsen 1.59 elsif ( $maxpeg > 0 )
723 :     {
724 : parrello 1.75 push @$html, $cgi->h3('No matching protein genes');
725 : golsen 1.59 }
726 : overbeek 1.17
727 : golsen 1.59 if ( ( $maxrole > 0 ) && @$role_index_data )
728 : efrank 1.1 {
729 : parrello 1.75 my $n = @$role_index_data;
730 :     if ($n > $maxrole)
731 :     {
732 :     $msg = "Showing first $maxrole out of $n Roles";
733 :     $#{$role_index_data} = $maxrole - 1;
734 :     }
735 :     else
736 :     {
737 :     $msg = "Showing $n Roles";
738 :     }
739 :    
740 :     if ( $maxpeg > 0 ) { push( @$html, $cgi->hr ) }
741 :     my $col_hdrs = ["Role"];
742 :     my $tab = [ map { &format_role_entry($fig,$cgi,$_) } @$role_index_data ];
743 :     push( @$html, &HTML::make_table($col_hdrs,$tab,$msg) );
744 : efrank 1.1 }
745 : golsen 1.59 elsif ( $maxrole > 0 )
746 :     {
747 : parrello 1.75 push @$html, $cgi->h3('No matching roles');
748 : golsen 1.59 }
749 : efrank 1.1 }
750 :    
751 : golsen 1.59
752 : efrank 1.1 sub format_peg_entry {
753 : golsen 1.67 my( $fig, $cgi, $entry ) = @_;
754 : efrank 1.1 my($i,$function,$who);
755 :    
756 :     my($peg,$gs,$aliases,@funcs) = @$entry;
757 : overbeek 1.17
758 : golsen 1.21 $gs =~ s/\s+\d+$//; # Org name comes with taxon_id appended (why?) -- GJO
759 : efrank 1.1
760 :     @funcs = map { $_ =~ s/^function:\s*//; $_ } @funcs;
761 :    
762 :     if ($aliases)
763 :     {
764 : parrello 1.75 $aliases =~ s/^aliases://;
765 : efrank 1.1 }
766 :     else
767 :     {
768 : parrello 1.75 $aliases = "";
769 : efrank 1.1 }
770 :    
771 : golsen 1.21 my $user = $cgi->param('user');
772 :     $user = $user ? $user : "";
773 :    
774 : efrank 1.1 if ($user)
775 :     {
776 : parrello 1.75 for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#$user/); $i++) {}
777 :     if ($i < @funcs)
778 :     {
779 :     ($function,$who) = split(/\#/,$funcs[$i]);
780 :     }
781 : efrank 1.1 }
782 : golsen 1.21
783 : efrank 1.1 if (! $function)
784 :     {
785 : parrello 1.75 for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#master/); $i++) {}
786 :     if ($i < @funcs)
787 :     {
788 :     ($function,$who) = split(/\#/,$funcs[$i]);
789 :     }
790 : efrank 1.1 }
791 :    
792 :     if ((! $function) && (@funcs > 0))
793 :     {
794 : parrello 1.75 ($function,$who) = split(/\#/,$funcs[0]);
795 : efrank 1.1 }
796 : golsen 1.67 my $box = "<input type=checkbox name=checked value=\"$peg\">";
797 : overbeek 1.17 return [ $box, &HTML::fid_link($cgi,$peg), $gs, $aliases, $function, $who ];
798 : efrank 1.1 }
799 :    
800 :     sub format_role_entry {
801 :     my($fig,$cgi,$entry) = @_;
802 :    
803 :     return [&HTML::role_link($cgi,$entry)];
804 :     }
805 :    
806 :     sub run_prot_scan_for_matches {
807 :     my($fig,$cgi,$html,$org,$pat) = @_;
808 :     my($string,$peg,$beg,$end,$user,$col_hdrs,$tab,$i);
809 :    
810 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
811 :     open(PAT,">$tmp_pat")
812 : parrello 1.75 || die "could not open $tmp_pat";
813 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
814 :     print PAT "$pat\n";
815 :     close(PAT);
816 :     my @out = `$FIG_Config::ext_bin/scan_for_matches -p $tmp_pat < $FIG_Config::organisms/$org/Features/peg/fasta`;
817 :     if (@out < 1)
818 :     {
819 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
820 : efrank 1.1 }
821 :     else
822 :     {
823 : parrello 1.75 if (@out > 2000)
824 :     {
825 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
826 :     $#out = 1999;
827 :     }
828 :    
829 :     push(@$html,$cgi->pre);
830 :     $user = $cgi->param('user');
831 :     $col_hdrs = ["peg","begin","end","string","function of peg"];
832 :     for ($i=0; ($i < @out); $i += 2)
833 :     {
834 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
835 :     {
836 :     $peg = $1;
837 :     $beg = $2;
838 :     $end = $3;
839 :     $string = $out[$i+1];
840 :     chomp $string;
841 :     push( @$tab, [ &HTML::fid_link($cgi,$peg,1),
842 :     $beg,
843 :     $end,
844 :     $string,
845 :     scalar $fig->function_of( $peg, $user )
846 :     ]
847 :     );
848 :     }
849 :     }
850 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
851 :     push(@$html,$cgi->end_pre);
852 : efrank 1.1 }
853 :     unlink($tmp_pat);
854 :     }
855 :    
856 : overbeek 1.17 #==============================================================================
857 :     # Scan for matches
858 :     #==============================================================================
859 :    
860 : efrank 1.1 sub run_dna_scan_for_matches {
861 :     my($fig,$cgi,$html,$org,$pat) = @_;
862 :     my($string,$contig,$beg,$end,$col_hdrs,$tab,$i);
863 :    
864 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
865 :     open(PAT,">$tmp_pat")
866 : parrello 1.75 || die "could not open $tmp_pat";
867 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
868 :     print PAT "$pat\n";
869 :     close(PAT);
870 :     my @out = `cat $FIG_Config::organisms/$org/contigs | $FIG_Config::ext_bin/scan_for_matches -c $tmp_pat`;
871 :     if (@out < 1)
872 :     {
873 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
874 : efrank 1.1 }
875 :     else
876 :     {
877 : parrello 1.75 if (@out > 2000)
878 :     {
879 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
880 :     $#out = 1999;
881 :     }
882 :    
883 :     push(@$html,$cgi->pre);
884 :     $col_hdrs = ["contig","begin","end","string"];
885 :     for ($i=0; ($i < @out); $i += 2)
886 :     {
887 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
888 :     {
889 :     $contig = $1;
890 :     $beg = $2;
891 :     $end = $3;
892 :     $string = $out[$i+1];
893 :     chomp $string;
894 :     push(@$tab,[$contig,$beg,$end,$string]);
895 :     }
896 :     }
897 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
898 :     push(@$html,$cgi->end_pre);
899 : efrank 1.1 }
900 :     unlink($tmp_pat);
901 :     }
902 :    
903 : overbeek 1.17 #==============================================================================
904 :     # BLAST search
905 :     #==============================================================================
906 :    
907 : efrank 1.1 sub run_blast {
908 : golsen 1.45 my( $fig, $cgi, $html, $org, $tool, $seq ) = @_;
909 :     my( $query, @out );
910 : efrank 1.1
911 : golsen 1.45 my $tmp_seq = "$FIG_Config::temp/run_blast_tmp$$.seq";
912 : efrank 1.1
913 : overbeek 1.17 #--------------------------------------------------------------------------
914 :     # Is the request for an id? Get the sequence
915 :     #--------------------------------------------------------------------------
916 : efrank 1.1 if ($seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/)
917 :     {
918 : parrello 1.75 # Replaced $id with $query so that output inherits label -- GJO
919 :     $query = $1;
920 :     $seq = "";
921 :     if (($tool eq "blastp") || ($tool eq "tblastn"))
922 :     {
923 :     $seq = $fig->get_translation($query);
924 :     }
925 :     elsif ($query =~ /^fig/)
926 :     {
927 :     my @locs;
928 :     if ((@locs = $fig->feature_location($query)) && (@locs > 0))
929 :     {
930 :     $seq = $fig->dna_seq($fig->genome_of($query),@locs);
931 :     }
932 :     }
933 :     if (! $seq)
934 :     {
935 :     push(@$html,$cgi->h1("Sorry, could not get sequence for $query"));
936 :     return;
937 :     }
938 : efrank 1.1 }
939 : golsen 1.45
940 : overbeek 1.17 #--------------------------------------------------------------------------
941 :     # Is it a fasta format? Get the query name
942 :     #--------------------------------------------------------------------------
943 : golsen 1.45
944 :     elsif ( $seq =~ s/^>\s*(\S+[^\n\012\015]*)// ) # more flexible match -- GJO
945 : efrank 1.1 {
946 : parrello 1.75 $query = $1;
947 : efrank 1.1 }
948 : golsen 1.45
949 : overbeek 1.17 #--------------------------------------------------------------------------
950 :     # Take it as plain text
951 :     #--------------------------------------------------------------------------
952 : golsen 1.45
953 : efrank 1.1 else
954 :     {
955 : parrello 1.75 $query = "query";
956 : efrank 1.1 }
957 : golsen 1.45
958 :     #
959 :     # The rest is taken as the sequence
960 :     #
961 :    
962 : golsen 1.23 $seq =~ s/\s+//g;
963 : golsen 1.45 open( SEQ, ">$tmp_seq" ) || die "run_blast could not open $tmp_seq";
964 : efrank 1.1 print SEQ ">$query\n$seq\n";
965 : golsen 1.45 close( SEQ );
966 : efrank 1.1
967 :     if (! $ENV{"BLASTMAT"}) { $ENV{"BLASTMAT"} = "$FIG_Config::blastmat" }
968 : golsen 1.45 my $blast_opt = $cgi->param( 'blast_options' );
969 :     my $blastall = "$FIG_Config::ext_bin/blastall";
970 : efrank 1.1
971 : golsen 1.45 if ( $tool eq "blastp" )
972 : efrank 1.1 {
973 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
974 :     &verify_db( $db, "p" );
975 :     @out = map { &HTML::set_prot_links($cgi,$_) } `$blastall -i $tmp_seq -d $db -p blastp $blast_opt`;
976 : efrank 1.1 }
977 : golsen 1.45
978 :     elsif ( $tool eq "blastx" )
979 : efrank 1.1 {
980 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
981 :     &verify_db( $db, "p" );
982 :     @out = map { &HTML::set_prot_links($cgi,$_) } `$blastall -i $tmp_seq -d $db -p blastx $blast_opt`;
983 : efrank 1.1 }
984 : golsen 1.45
985 :     elsif ( $tool eq "blastn" )
986 : efrank 1.1 {
987 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
988 :     &verify_db( $db, "n" ); ### fix to get all contigs
989 :     @out = `$blastall -i $tmp_seq -d $db -p blastn -r 1 -q -1 $blast_opt`;
990 :     push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
991 : efrank 1.1 }
992 : golsen 1.45
993 :     elsif ( $tool eq "tblastn" )
994 : efrank 1.1 {
995 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
996 :     &verify_db( $db, "n" ); ### fix to get all contigs
997 :     @out = `$blastall -i $tmp_seq -d $db -p tblastn $blast_opt`;
998 :     push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
999 : efrank 1.1 }
1000 : golsen 1.45
1001 :     elsif ( $tool eq 'blastp against complete genomes' ) ### this tool gets nonstandard treatment: RAO
1002 : overbeek 1.30 {
1003 : parrello 1.75 &blast_complete($fig,$cgi,$html,$tmp_seq,$blastall);
1004 :     unlink($tmp_seq);
1005 :     return;
1006 : overbeek 1.30 }
1007 : golsen 1.45
1008 : overbeek 1.17 if (@out < 1) # This is really a bigger problem than no hits (GJO)
1009 : efrank 1.1 {
1010 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
1011 : efrank 1.1 }
1012 :     else
1013 :     {
1014 : parrello 1.75 push(@$html,$cgi->pre);
1015 :     push(@$html,@out);
1016 :     push(@$html,$cgi->end_pre);
1017 : efrank 1.1 }
1018 : golsen 1.45 unlink( $tmp_seq );
1019 : efrank 1.1 }
1020 :    
1021 : golsen 1.45
1022 : overbeek 1.30 sub blast_complete {
1023 :     my($fig,$cgi,$html,$seq_file,$blastall) = @_;
1024 :     my($genome,@sims);
1025 :    
1026 :     @sims = ();
1027 :     foreach $genome ($fig->genomes("complete"))
1028 :     {
1029 : parrello 1.75 my $db = "$FIG_Config::organisms/$genome/Features/peg/fasta";
1030 :     next if (! -s $db);
1031 : overbeek 1.30
1032 : parrello 1.75 &verify_db($db,"p");
1033 :     my $sim;
1034 :     push(@sims,map { chop;
1035 :     $sim = [split(/\t/,$_)];
1036 :     $sim->[10] = ($sim->[10] =~ /^e-/) ? "1.0" . $sim->[10] : $sim->[10];
1037 :     $sim }
1038 :     `$blastall -i $seq_file -d $db -m 8 -FF -e 1.0e-5 -p blastp`);
1039 : overbeek 1.30 }
1040 :     @sims = sort { $a->[10] <=> $b->[10] } @sims;
1041 :     &format_sims($fig,$cgi,$html,\@sims);
1042 :     }
1043 :    
1044 : golsen 1.65
1045 :     #------------------------------------------------------------------------------
1046 :     # Graphically display searches against contigs
1047 :     #
1048 :     # use FIGjs qw( toolTipScript );
1049 :     # use GenoGraphics qw( render );
1050 :     # use IPC::Open2 qw( open2 );
1051 :     #------------------------------------------------------------------------------
1052 :     # Fields produced by rationalize_blast:
1053 :     #
1054 :     # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1055 :     # HSP score exp p_n p_val n_match n_ident n_sim n_gap dir q1 q2 q_sq s1 s2 s_sq
1056 :     #------------------------------------------------------------------------------
1057 :    
1058 :     sub blast_graphics {
1059 :     my ( $fig_or_sprout, $cgi, $genome, $out, $tool ) = @_;
1060 :    
1061 :     my $e_min = 0.1;
1062 :     my $gg = [];
1063 :     my @html = ();;
1064 :    
1065 :     # Run rationalize_blast:
1066 :    
1067 :     my( $pid, $rd, $wr );
1068 :     if ( $pid = open2( $rd, $wr, "rationalize_blast" ) )
1069 :     {
1070 : parrello 1.75 my $outlen = 0;
1071 :     foreach ( @$out ) { $outlen += length( $_ ) }
1072 : golsen 1.65
1073 : parrello 1.75 $wr->write( join( "", @$out ), $outlen );
1074 :     close( $wr );
1075 : golsen 1.65
1076 :     my ( $qid, $qdef, $qlen, $contig, $sdef, $slen );
1077 :     my @rational = <$rd>;
1078 :     foreach ( map { chomp; $_ } @rational )
1079 :     {
1080 :     if ( /^Query=/ ) { ( undef, $qid, undef, $qlen ) = split /\t/ }
1081 :     elsif ( /^>/ ) { ( undef, $contig, undef, $slen ) = split /\t/ }
1082 :     elsif ( /^HSP/ && $qid && $qlen && $contig && $slen )
1083 :     {
1084 :     my @hsp = split /\t/;
1085 :     next if $hsp[2] > $e_min;
1086 :     my ( $e_val, $q1, $q2, $s1, $s2 ) = @hsp[ 2, 10, 11, 13, 14 ];
1087 :     my ( $genes, $min, $max ) = hsp_context( $fig_or_sprout, $cgi, $genome,
1088 :     $e_val, 100 * $hsp[6] / $hsp[5],
1089 :     $qid, $q1, $q2, $qlen,
1090 :     $contig, $s1, $s2, $slen
1091 :     );
1092 : overbeek 1.74 if ($min && $max)
1093 :     {
1094 :     push @$gg, [ substr( $contig, 0, 18 ), $min, $max, $genes ];
1095 :     }
1096 : golsen 1.65 }
1097 :     }
1098 :     close( $rd );
1099 :    
1100 :     # $gene = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
1101 :     # $genes = [ $gene, $gene, ... ];
1102 :     # $map = [ $label, $min_coord, $max_coord, $genes ];
1103 :     # $gg = [ $map, $map, ... ];
1104 :     # render( $gg, $width, $obj_half_heigth, $save, $img_index_number )
1105 :    
1106 : parrello 1.75 waitpid $pid, 0;
1107 : golsen 1.65 }
1108 :    
1109 :     return @html;
1110 :     }
1111 :    
1112 :    
1113 :     sub hsp_context {
1114 :     my( $fig_or_sprout, $cgi, $genome, $e_val, $pct_id,
1115 :     $qid, $q1, $q2, $qlen,
1116 : parrello 1.75 $contig, $s1, $s2, $slen ) = @_;
1117 : golsen 1.65 my $half_sz = 5000;
1118 :    
1119 :     my( $from, $to, $features, $fid, $beg, $end );
1120 :     my( $link, $lbl, $isprot, $function, $uniprot, $info, $prot_query );
1121 :    
1122 :     my $user = $cgi->param( 'user' ) || "";
1123 :     my $sprout = $cgi->param( 'SPROUT' ) ? '&SPROUT=1' : '';
1124 :    
1125 :     my @genes = ();
1126 :    
1127 :     # Based on the match position of the query, select the context region:
1128 :    
1129 :     ( $from, $to ) = ( $s1 <= $s2 ) ? ( $s1 - $half_sz, $s2 + $half_sz )
1130 :     : ( $s2 - $half_sz, $s1 + $half_sz );
1131 :     $from = 1 if ( $from < 1 );
1132 :     $to = $slen if ( $to > $slen );
1133 :    
1134 :     # Get the genes in the region, and adjust the ends to include whole genes:
1135 :    
1136 :     ( $features, $from, $to ) = genes_in_region( $fig_or_sprout, $cgi, $genome, $contig, $from, $to );
1137 :    
1138 :    
1139 :     # Add the other features:
1140 :    
1141 :     foreach $fid ( @$features )
1142 :     {
1143 : parrello 1.75 my $contig1;
1144 :     ( $contig1, $beg, $end ) = boundaries_of( $fig_or_sprout, feature_locationS( $fig_or_sprout, $fid ) );
1145 :     next if $contig1 ne $contig;
1146 :    
1147 :     $link = "";
1148 :     if ( ( $lbl ) = $fid =~ /peg\.(\d+)$/ ) {
1149 :     ( $link = $cgi->url() ) =~ s/index\.cgi/protein.cgi/;
1150 :     $link .= "?prot=$fid&user=$user$sprout";
1151 :     $isprot = 1;
1152 :     } elsif ( ( $lbl ) = $fid =~ /\.([a-z]+)\.\d+$/ ) {
1153 :     $lbl = uc $lbl;
1154 :     $isprot = 0;
1155 :     } else {
1156 :     $lbl = "";
1157 :     $isprot = 0;
1158 :     }
1159 :    
1160 :     $function = function_ofS( $fig_or_sprout, $fid );
1161 :    
1162 :     $uniprot = join ", ", grep { /^uni\|/ } feature_aliasesL( $fig_or_sprout, $fid);
1163 :    
1164 :     $info = join( '<br />', "<b>Feature:</b> $fid",
1165 :     "<b>Contig:</b> $contig",
1166 :     "<b>Begin:</b> $beg",
1167 :     "<b>End:</b> $end",
1168 :     $function ? "<b>Function:</b> $function" : '',
1169 :     $uniprot ? "<b>Uniprot ID:</b> $uniprot" : ''
1170 :     );
1171 :    
1172 :     push @genes, [ feature_graphic( $beg, $end, $isprot ),
1173 :     $lbl, $link, $info,
1174 :     $isprot ? () : ( undef, "Feature information" )
1175 :     ];
1176 : golsen 1.65 }
1177 :    
1178 :     # Draw the query. The subject coordinates are always DNA. If the query
1179 :     # is protein, it is about 3 times shorter than the matching contig DNA.
1180 :     # Splitting the difference, if 1.7 times the query length is still less
1181 :     # than the subject length, we will call it a protein query (and reading
1182 :     # frame in the contig coordinates has meaning). If it is nucleotides,
1183 :     # there is no defined frame.
1184 :    
1185 :     $info = join( '<br />', $qid ne 'query ' ? "<b>Query:</b> $qid" : (),
1186 :     "<b>Length:</b> $qlen",
1187 :     "<b>E-value:</b> $e_val",
1188 :     "<b>% identity:</b> " . sprintf( "%.1f", $pct_id ),
1189 :     "<b>Region of similarity:</b> $q1 &#150; $q2"
1190 :     );
1191 :     $prot_query = ( 1.7 * abs( $q2 - $q1 ) < abs( $s2 - $s1 ) ) ? 1 : 0;
1192 :    
1193 :     push @genes, [ feature_graphic( $s1, $s2, $prot_query ),
1194 :     'Q', undef, $info, undef, 'Query and match information'
1195 :     ];
1196 :    
1197 :     return \@genes, $from, $to;
1198 :     }
1199 :    
1200 :    
1201 :     sub feature_graphic {
1202 :     my ( $beg, $end, $isprot ) = @_;
1203 :     my ( $min, $max, $symb, $color );
1204 :    
1205 :     ( $min, $max, $symb ) = ( $beg <= $end ) ? ( $beg, $end, "rightArrow" )
1206 :     : ( $end, $beg, "leftArrow" );
1207 :    
1208 :     # Color proteins by translation frame
1209 :    
1210 :     $color = $isprot ? qw( blue red green )[ $beg % 3 ] : 'grey';
1211 :    
1212 :     ( $min, $max, $symb, $color );
1213 :     }
1214 :    
1215 :    
1216 :     sub genes_in_region {
1217 :     my( $fig_or_sprout, $cgi, $genome, $contig, $min, $max ) = @_;
1218 :    
1219 :     if ( $cgi->param( 'SPROUT' ) )
1220 :     {
1221 : parrello 1.75 my( $x, $feature_id );
1222 :     my( $feat, $min, $max ) = $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1223 :     my @tmp = sort { ($a->[1] cmp $b->[1]) or
1224 :     (($a->[2]+$a->[3]) <=> ($b->[2]+$b->[3]))
1225 :     }
1226 :     map { $feature_id = $_;
1227 :     $x = feature_locationS( $fig_or_sprout, $feature_id );
1228 :     $x ? [ $feature_id, boundaries_of( $fig_or_sprout, $x )] : ()
1229 :     }
1230 :     @$feat;
1231 :     return ( [map { $_->[0] } @tmp ], $min, $max );
1232 : golsen 1.65 }
1233 :     else
1234 :     {
1235 : parrello 1.75 return $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1236 : golsen 1.65 }
1237 :     }
1238 :    
1239 :    
1240 :     sub feature_locationS {
1241 :     my ( $fig_or_sprout, $peg ) = @_;
1242 :     scalar $fig_or_sprout->feature_location( $peg );
1243 :     }
1244 :    
1245 :    
1246 :     sub boundaries_of {
1247 :     my( $fig_or_sprout, $loc ) = @_;
1248 :     $fig_or_sprout->boundaries_of( $loc );
1249 :     }
1250 :    
1251 :    
1252 :     sub function_ofS {
1253 :     my( $fig_or_sprout, $peg, $user ) = @_;
1254 :     scalar $fig_or_sprout->function_of( $peg, $user );
1255 :     }
1256 :    
1257 :    
1258 :     sub feature_aliasesL {
1259 :     my( $fig_or_sprout, $fid ) = @_;
1260 :     my @tmp = $fig_or_sprout->feature_aliases( $fid );
1261 :     @tmp
1262 :     }
1263 :    
1264 :    
1265 : overbeek 1.30 sub format_sims {
1266 :     my($fig,$cgi,$html,$sims) = @_;
1267 :     my($col_hdrs,$table,@ids,$ids,$sim,%seen);
1268 :    
1269 :     $col_hdrs = [ "Select up to here",
1270 : parrello 1.75 "Similar sequence",
1271 :     "E-val",
1272 :     "Function",
1273 :     "Organism",
1274 :     "Aliases"
1275 :     ];
1276 : overbeek 1.30
1277 :     $table = [];
1278 :     @ids = ();
1279 :     if (@$sims > 1000) { $#{$sims} = 999 }
1280 :     foreach $sim (@$sims)
1281 :     {
1282 : parrello 1.75 if (! $seen{$sim->[1]})
1283 :     {
1284 :     push(@$table,[$cgi->checkbox(-name => 'list_to', -value => $sim->[1], -override => 1, -checked => 0, -label => ""),
1285 :     &HTML::fid_link($cgi,$sim->[1]),
1286 :     $sim->[10],
1287 :     scalar $fig->function_of($sim->[1]),
1288 : overbeek 1.30 $fig->genus_species(&FIG::genome_of($sim->[1])),
1289 : parrello 1.75 scalar $fig->feature_aliases($sim->[1])
1290 :     ]);
1291 :     push(@ids,$sim->[1]);
1292 :     }
1293 : overbeek 1.30 }
1294 :     $ids = join(",",@ids);
1295 :     my $target = "window$$";
1296 :     push(@$html, $cgi->start_form( -method => 'post',
1297 : parrello 1.75 -target => $target,
1298 :     -action => 'index.cgi'
1299 :     ),
1300 :     $cgi->hidden(-name => 'ids', -value => $ids),
1301 :     &HTML::make_table($col_hdrs,$table,"Best Hits"),
1302 :     $cgi->submit('Extract Matched Sequences'),
1303 :     $cgi->end_form);
1304 : overbeek 1.30 }
1305 : overbeek 1.17
1306 : efrank 1.1 sub verify_db {
1307 :     my($db,$type) = @_;
1308 :    
1309 : overbeek 1.17 if ($type =~ /^p/i)
1310 : efrank 1.1 {
1311 : parrello 1.75 if ((! -s "$db.psq") || (-M "$db.psq" > -M $db))
1312 :     {
1313 :     system "$FIG_Config::ext_bin/formatdb -p T -i $db";
1314 :     }
1315 : efrank 1.1 }
1316 :     else
1317 :     {
1318 : parrello 1.75 if ((! -s "$db.nsq") || (-M "$db.nsq" > -M $db))
1319 :     {
1320 :     system "$FIG_Config::ext_bin/formatdb -p F -i $db";
1321 :     }
1322 : efrank 1.1 }
1323 : parrello 1.75 }
1324 : overbeek 1.7
1325 :     sub export_assignments {
1326 :     my($fig,$cgi,$html,$who) = @_;
1327 :     my($genome,$x);
1328 :    
1329 :     my @genomes = map { $_ =~ /\((\d+\.\d+)\)/; $1 } $cgi->param('korgs');
1330 :    
1331 :     if (@genomes == 0)
1332 :     {
1333 : parrello 1.75 @genomes = $fig->genomes;
1334 : overbeek 1.7 }
1335 :    
1336 : overbeek 1.10 my @assignments = $fig->assignments_made(\@genomes,$who,$cgi->param('after_date'));
1337 : overbeek 1.7 if (@assignments == 0)
1338 :     {
1339 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no assignments where made by $who"));
1340 : overbeek 1.7 }
1341 :     else
1342 :     {
1343 : parrello 1.75 my $col_hdrs = ["FIG id", "External ID", "Genus/Species","Assignment"];
1344 :     my $tab = [];
1345 :     my($x,$peg,$func);
1346 :     foreach $x (@assignments)
1347 :     {
1348 :     ( $peg, $func ) = @$x;
1349 :     push( @$tab,[ HTML::set_prot_links( $cgi, $peg ),
1350 :     HTML::set_prot_links( $cgi, ext_id( $fig, $peg ) ),
1351 :     $fig->genus_species($fig->genome_of($peg)),
1352 :     $func
1353 :     ] );
1354 :     }
1355 :    
1356 :     if ($cgi->param('save_assignments'))
1357 :     {
1358 :     my $user = $cgi->param('save_user');
1359 :     if ($user)
1360 :     {
1361 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
1362 :     my $file = &FIG::epoch_to_readable(time) . ":$who:exported_from_local_SEED";
1363 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
1364 :     {
1365 :     print TMP join("",map { join("\t",@$_) . "\n" } map { [$_->[0],$_->[3]] } @$tab);
1366 :     close(TMP);
1367 :     }
1368 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
1369 :     }
1370 :     else
1371 :     {
1372 :     push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
1373 :     }
1374 :     }
1375 :    
1376 :     if ($cgi->param('tabs'))
1377 :     {
1378 :     print $cgi->header;
1379 :     print "<pre>\n";
1380 :     print join("",map { join("\t",@$_) . "\n" } @$tab);
1381 :     print "</pre>\n";
1382 :     exit;
1383 :     }
1384 :     else
1385 :     {
1386 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Assignments Made by $who"));
1387 :     }
1388 : overbeek 1.7 }
1389 :     }
1390 :    
1391 :     sub ext_id {
1392 :     my($fig,$peg) = @_;
1393 :    
1394 :     my @mapped = grep { $_ !~ /^fig/ } map { $_->[0] } $fig->mapped_prot_ids($peg);
1395 :     if (@mapped == 0)
1396 :     {
1397 : parrello 1.75 return $peg;
1398 : overbeek 1.7 }
1399 :    
1400 :     my @tmp = ();
1401 :     if ((@tmp = grep { $_ =~ /^sp/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1402 :     if ((@tmp = grep { $_ =~ /^pir/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1403 :     if ((@tmp = grep { $_ =~ /^gi/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1404 :     if ((@tmp = grep { $_ =~ /^tr/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1405 :     if ((@tmp = grep { $_ =~ /^tn/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1406 :     if ((@tmp = grep { $_ =~ /^kegg/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1407 :    
1408 :     return $peg;
1409 :     }
1410 :    
1411 : overbeek 1.28 sub translate_assignments {
1412 :     my($fig,$cgi,$html,$from_func,$to_func) = @_;
1413 :    
1414 : overbeek 1.56 my @funcs = grep { $_ =~ /^\S.*\S$/ } split(/[\012\015]+/,$from_func);
1415 :    
1416 : overbeek 1.28 my $user = $cgi->param('save_user');
1417 :     if ($user)
1418 :     {
1419 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
1420 :     my $file = &FIG::epoch_to_readable(time) . ":$user:translation";
1421 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
1422 :     {
1423 : overbeek 1.55 my($peg,$func);
1424 : overbeek 1.28
1425 : overbeek 1.56 foreach $from_func (@funcs)
1426 : overbeek 1.28 {
1427 : overbeek 1.57 my $from_funcQ = quotemeta $from_func;
1428 :    
1429 : overbeek 1.56 foreach $peg ($fig->seqs_with_role($from_func))
1430 : overbeek 1.28 {
1431 : overbeek 1.56 if ($peg =~ /^fig\|/)
1432 : overbeek 1.28 {
1433 : overbeek 1.56 $func = $fig->function_of($peg);
1434 :     if ($func eq $from_func)
1435 :     {
1436 :     print TMP "$peg\t$to_func\n";
1437 :     }
1438 : overbeek 1.73 else
1439 : overbeek 1.57 {
1440 : overbeek 1.73 my @pieces = grep { $_ } split(/(\s+[\/@]\s+)|(\s*;\s+)/,$func);
1441 :     if (@pieces > 1)
1442 :     {
1443 :     my $func1 = join("",map { $_ =~ s/^$from_funcQ$/$to_func/; $_ } @pieces);
1444 :     if ($func ne $func1)
1445 :     {
1446 :     print TMP "$peg\t$func1\n";
1447 :     }
1448 :     }
1449 : overbeek 1.57 }
1450 : overbeek 1.28 }
1451 :     }
1452 :     }
1453 :     close(TMP);
1454 :     }
1455 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
1456 :     }
1457 :     else
1458 :     {
1459 : parrello 1.75 push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
1460 : overbeek 1.28 }
1461 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3