[Bio] / FigWebServices / index.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/index.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.119 - (view) (download)

1 : olson 1.93 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.49 ### start
19 :    
20 : efrank 1.1 use FIG;
21 : olson 1.89 use FIG_CGI;
22 : efrank 1.1
23 : mkubal 1.109 #use strict;
24 : parrello 1.78 use Tracer;
25 : golsen 1.97 use FIGjs qw( toolTipScript );
26 :     use GenoGraphics qw( render );
27 :     use gjoparseblast qw( next_blast_hsp );
28 : golsen 1.65
29 : efrank 1.92 use URI::Escape; # uri_escape
30 : olson 1.36 use POSIX;
31 : efrank 1.1 use HTML;
32 : golsen 1.65
33 : olson 1.89 my($fig, $cgi, $user);
34 : olson 1.90 my $this_script = "index.cgi";
35 : efrank 1.1
36 : olson 1.54 eval {
37 : olson 1.89 ($fig, $cgi, $user) = FIG_CGI::init(debug_save => 0,
38 :     debug_load => 0,
39 :     print_params => 0);
40 : olson 1.54 };
41 :    
42 :     if ($@ ne "")
43 :     {
44 :     my $err = $@;
45 :    
46 :     my(@html);
47 :    
48 :     push(@html, $cgi->p("Error connecting to SEED database."));
49 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
50 :     {
51 : parrello 1.75 push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
52 : olson 1.54 }
53 :     else
54 :     {
55 : parrello 1.75 push(@html, $cgi->pre($err));
56 : olson 1.54 }
57 :     &HTML::show_page($cgi, \@html, 1);
58 :     exit;
59 :     }
60 : olson 1.89
61 : parrello 1.115 ETracing($cgi);
62 : parrello 1.78 Trace("Connected to FIG.") if T(2);
63 : overbeek 1.28 my($map,@orgs,$user,$map,$org,$made_by,$from_func,$to_func);
64 : efrank 1.1
65 : overbeek 1.85 #for my $k (sort keys %ENV)
66 :     #{
67 :     # warn "$k=$ENV{$k}\n";
68 :     #}
69 : overbeek 1.81
70 : efrank 1.1 $ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};
71 :    
72 :     my $html = [];
73 : golsen 1.23
74 : overbeek 1.51 my($pattern,$seq_pat,$tool,$ids,$subsearch);
75 : redwards 1.80
76 : overbeek 1.87 my $user = $cgi->param('user');
77 :    
78 : overbeek 1.28 if ($cgi->param('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'))
79 : efrank 1.1 {
80 : parrello 1.78 Trace("Gene search chosen.") if T(2);
81 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Phylogenetic Signatures</TITLE>\n";
82 : efrank 1.1 $ENV{"REQUEST_METHOD"} = "GET";
83 :     $ENV{"QUERY_STRING"} = "user=$user";
84 :     my @out = `./sigs.cgi`;
85 : overbeek 1.49 print @out;
86 :     exit;
87 :     }
88 :     #-----------------------------------------------------------------------
89 :     # Statistics for a single organism
90 :     #-----------------------------------------------------------------------
91 :     elsif ($cgi->param('statistics'))
92 :     {
93 : parrello 1.78 Trace("Statistics chosen.") if T(2);
94 : overbeek 1.49 @orgs = $cgi->param('korgs');
95 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
96 :     if (@orgs != 1)
97 :     {
98 : golsen 1.72 unshift @$html, "<TITLE>The SEED Statistics Page</TITLE>\n";
99 : overbeek 1.49 push(@$html,$cgi->h1('Please select a single organism to get statistcs'));
100 :     }
101 :     else
102 :     {
103 : parrello 1.75 $ENV{"REQUEST_METHOD"} = "GET";
104 :     $ENV{"QUERY_STRING"} = "user=$user&genome=$orgs[0]";
105 :     my @out = `./genome_statistics.cgi`;
106 :     print @out;
107 :     exit;
108 : overbeek 1.49 }
109 : efrank 1.1 }
110 : overbeek 1.17 #-----------------------------------------------------------------------
111 : overbeek 1.51 # Locate PEGs in Subsystems
112 :     #-----------------------------------------------------------------------
113 :     elsif ($cgi->param('Find PEGs') && ($subsearch = $cgi->param('subsearch')))
114 :     {
115 : parrello 1.78 Trace("PEG find chosen.") if T(2);
116 : overbeek 1.51 my $genome = $cgi->param('genome');
117 :     my (@pegs,$peg);
118 : overbeek 1.52
119 :     my @poss = $fig->by_alias($subsearch);
120 :     if (@poss > 0) { $subsearch = $poss[0] }
121 :    
122 : overbeek 1.51 if ($subsearch =~ /(fig\|\d+\.\d+\.peg\.\d+)/)
123 :     {
124 : parrello 1.75 # handle searching for homologs that occur in subsystems
125 :     $peg = $1;
126 :     @pegs = ($peg);
127 :     push(@pegs,map { $_->id2 } $fig->sims( $peg, 500, 1.0e-10, "fig"));
128 :     if ($genome)
129 :     {
130 :     my $genomeQ = quotemeta $genome;
131 :     @pegs = grep { $_ =~ /^fig\|$genomeQ/ } @pegs;
132 :     }
133 : overbeek 1.51 }
134 :     else
135 :     {
136 : parrello 1.75 # handle searching for PEGs with functional role in subsystems
137 :     @pegs = $fig->seqs_with_role($subsearch,"master",$genome);
138 : overbeek 1.51 }
139 :    
140 :     print $cgi->header;
141 :     if (@pegs == 0)
142 :     {
143 : parrello 1.75 print $cgi->h1("Sorry, could not even find PEGs to check");
144 : overbeek 1.51 }
145 :     else
146 :     {
147 : parrello 1.75 my(@pairs,$pair,@sub);
148 :     @pairs = map { $peg = $_;
149 :     @sub = $fig->peg_to_subsystems($peg);
150 :     map { [$peg,$_] } @sub } @pegs;
151 :     if (@pairs == 0)
152 :     {
153 :     print $cgi->h1("Sorry, could not map any PEGs to subsystems");
154 :     }
155 :     else
156 :     {
157 :     my($uni,$uni_func);
158 :     my $col_hdrs = ["PEG","Genome","Function","UniProt","UniProt Function","Subsystem"];
159 :     my $tab = [ map { $pair = $_; $uni = $fig->to_alias($pair->[0],"uni");
160 :     ($uni,$uni_func) = $uni ? (&HTML::uni_link($cgi,$uni),scalar $fig->function_of($uni)) : ("","");
161 :     [&HTML::fid_link($cgi,$pair->[0]),
162 :     $fig->org_of($pair->[0]),
163 :     scalar $fig->function_of($pair->[0]),
164 :     $uni,$uni_func,
165 :     &HTML::sub_link($cgi,$pair->[1])] } @pairs];
166 :     print &HTML::make_table($col_hdrs,$tab,"PEGs that Occur in Subsystems");
167 :     }
168 : overbeek 1.51 }
169 :     exit;
170 :     }
171 :     #-----------------------------------------------------------------------
172 : overbeek 1.31 # Align Sequences
173 :     #-----------------------------------------------------------------------
174 :     elsif ($cgi->param('Align Sequences'))
175 :     {
176 : parrello 1.78 Trace("Sequence alignment chosen.");
177 : overbeek 1.31 my $seqs = $cgi->param('seqids');
178 :     $seqs =~ s/^\s+//;
179 :     $seqs =~ s/\s+$//;
180 :     my @seq_ids = split(/[ \t,;]+/,$seqs);
181 :     if (@seq_ids < 2)
182 :     {
183 : parrello 1.75 print $cgi->header;
184 :     print $cgi->h1("Sorry, you need to specify at least two sequence IDs");
185 : overbeek 1.31 }
186 :     else
187 :     {
188 : parrello 1.75 $ENV{"REQUEST_METHOD"} = "GET";
189 :     $_ = join('&checked=',@seq_ids);
190 :     $ENV{"QUERY_STRING"} = "user=$user&align=1&checked=" . $_;
191 :     my @out = `./fid_checked.cgi`;
192 :     print join("",@out);
193 : overbeek 1.31 }
194 :     exit;
195 :     }
196 :     #-----------------------------------------------------------------------
197 : overbeek 1.17 # Search (text) || Find Genes in Org that Might Play the Role
198 :     #-----------------------------------------------------------------------
199 : golsen 1.59 elsif ( ( $pattern = $cgi->param('pattern') )
200 :     && ( $cgi->param('Search')
201 :     || $cgi->param('Search genome selected below')
202 : redwards 1.63 || $cgi->param('Search Selected Organisms')
203 : golsen 1.59 || $cgi->param('Find Genes in Org that Might Play the Role')
204 :     )
205 :     )
206 : efrank 1.1 {
207 : parrello 1.78 Trace("Pattern search chosen.") if T(2);
208 : overbeek 1.17 # Remove leading and trailing spaces from pattern -- GJO:
209 :     $pattern =~ s/^\s+//;
210 :     $pattern =~ s/\s+$//;
211 : efrank 1.1 if ($cgi->param('Find Genes in Org that Might Play the Role') &&
212 : parrello 1.75 (@orgs = $cgi->param('korgs')) && (@orgs == 1))
213 : efrank 1.1 {
214 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Genes in that Might Play Specific Role</TITLE>\n";
215 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
216 :     $ENV{"REQUEST_METHOD"} = "GET";
217 :     $ENV{"QUERY_STRING"} = "user=$user&request=find_in_org&role=$pattern&org=$orgs[0]";
218 :     my @out = `./pom.cgi`;
219 :     print join("",@out);
220 :     exit;
221 : efrank 1.1 }
222 :     else
223 :     {
224 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Search Results</TITLE>\n";
225 :     &show_indexed_objects($fig, $cgi, $html, $pattern);
226 : efrank 1.1 }
227 :     }
228 : overbeek 1.17 #-----------------------------------------------------------------------
229 :     # Metabolic Overview
230 :     #-----------------------------------------------------------------------
231 : efrank 1.1 elsif (($map = $cgi->param('kmap')) && $cgi->param('Metabolic Overview'))
232 :     {
233 : parrello 1.78 Trace("Metabolic overview chosen.") if T(2);
234 : olson 1.38 if ($map =~ /\(([^)]*)\)$/)
235 :     {
236 : parrello 1.75 $map = $1;
237 : olson 1.38 }
238 :     else
239 :     {
240 : parrello 1.75 # ??? Gary ???
241 : olson 1.38 }
242 :    
243 :     #$map =~ s/^.*\((MAP\d+)\).*$/$1/;
244 : efrank 1.1 @orgs = $cgi->param('korgs');
245 :     @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
246 :     $ENV{"REQUEST_METHOD"} = "GET";
247 :     if (@orgs > 0)
248 :     {
249 : parrello 1.75 $ENV{"QUERY_STRING"} = "user=$user&map=$map&org=$orgs[0]";
250 : efrank 1.1 }
251 :     else
252 :     {
253 : parrello 1.75 $ENV{"QUERY_STRING"} = "user=$user&map=$map";
254 : efrank 1.1 }
255 :    
256 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Metabolic Overview</TITLE>\n";
257 : olson 1.38 my @out = `./show_map.cgi`;
258 : efrank 1.1 &HTML::trim_output(\@out);
259 : golsen 1.23 push( @$html, "<br>\n", @out );
260 : efrank 1.1 }
261 : overbeek 1.17
262 :     #-----------------------------------------------------------------------
263 :     # Search for Matches (sequence or pattern)
264 :     #-----------------------------------------------------------------------
265 : efrank 1.1 elsif (($seq_pat = $cgi->param('seq_pat')) &&
266 :     ($tool = $cgi->param('Tool')) &&
267 :     $cgi->param('Search for Matches'))
268 :     {
269 : parrello 1.78 Trace("Match search chosen.") if T(2);
270 : overbeek 1.30 @orgs = $cgi->param('korgs');
271 :     if (@orgs > 0)
272 :     {
273 : parrello 1.75 @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
274 : overbeek 1.30 }
275 :     else
276 :     {
277 : parrello 1.75 @orgs = ("");
278 : overbeek 1.30 }
279 :    
280 : efrank 1.1 if ($tool =~ /blast/)
281 :     {
282 : parrello 1.75 unshift @$html, "<TITLE>The SEED: BLAST Search Results</TITLE>\n";
283 :     &run_blast($fig,$cgi,$html,$orgs[0],$tool,$seq_pat);
284 : efrank 1.1 }
285 :     elsif ($tool =~ /Protein scan_for_matches/)
286 :     {
287 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Protein Pattern Match Results</TITLE>\n";
288 :     &run_prot_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
289 : efrank 1.1 }
290 :     elsif ($tool =~ /DNA scan_for_matches/)
291 :     {
292 : parrello 1.75 unshift @$html, "<TITLE>The SEED: Nucleotide Pattern Match Results</TITLE>\n";
293 :     &run_dna_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
294 : efrank 1.1 }
295 :     }
296 : overbeek 1.7 elsif (($made_by = $cgi->param('made_by')) && $cgi->param('Extract Assignments'))
297 :     {
298 : parrello 1.78 Trace("Assignment export chosen.") if T(2);
299 : overbeek 1.7 &export_assignments($fig,$cgi,$html,$made_by);
300 :     }
301 : overbeek 1.28 elsif ($cgi->param('Generate Assignments via Translation') &&
302 :     ($from_func = $cgi->param('from_func')) &&
303 :     ($to_func = $cgi->param('to_func')))
304 :     {
305 : parrello 1.78 Trace("Assignment translate chosen.") if T(2);
306 : overbeek 1.28 &translate_assignments($fig,$cgi,$html,$from_func,$to_func);
307 :     }
308 : golsen 1.96
309 : overbeek 1.30 elsif ($cgi->param('Extract Matched Sequences') && ($ids = $cgi->param('ids')))
310 :     {
311 : parrello 1.78 Trace("Matched sequence extract chosen.") if T(2);
312 : overbeek 1.30 my @ids = split(/,/,$ids);
313 : golsen 1.96
314 :     # Truncate the list if requested:
315 :    
316 : overbeek 1.30 my($list_to,$i);
317 :     if ($list_to = $cgi->param('list_to'))
318 :     {
319 : parrello 1.75 for ($i=0; ($i < @ids) && ($ids[$i] ne $list_to); $i++) {}
320 :     if ($i < @ids)
321 :     {
322 :     $#ids = $i;
323 :     }
324 : overbeek 1.30 }
325 :    
326 : golsen 1.96 # Print the sequences:
327 :     # Add organisms -- GJO
328 : overbeek 1.30
329 : golsen 1.96 my( $id, $seq, $desc, $func, $org );
330 :     push( @$html, $cgi->pre );
331 : overbeek 1.30 foreach $id (@ids)
332 :     {
333 : parrello 1.75 if ($seq = $fig->get_translation($id))
334 :     {
335 : golsen 1.96 $desc = $id;
336 :     if ( $func = $fig->function_of( $id ) )
337 :     {
338 :     $desc .= " $func";
339 :     }
340 :     if ( $org = $fig->genus_species( $fig->genome_of( $id ) ) )
341 :     {
342 :     $desc .= " [$org]" if $org;
343 :     }
344 :     push( @$html, ">$desc\n" );
345 : parrello 1.75 for ($i=0; ($i < length($seq)); $i += 60)
346 :     {
347 : golsen 1.96 # substr does not mind a request for more than length
348 :     push( @$html, substr( $seq, $i, 60 ) . "\n" );
349 : parrello 1.75 }
350 :     }
351 : overbeek 1.30 }
352 :     push(@$html,$cgi->end_pre);
353 :     }
354 : overbeek 1.17
355 :     #-----------------------------------------------------------------------
356 :     # Initial search page
357 :     #-----------------------------------------------------------------------
358 : efrank 1.1 else
359 :     {
360 : parrello 1.78 Trace("SEED Entry page chosen.") if T(2);
361 : golsen 1.23 unshift @$html, "<TITLE>The SEED: Entry Page</TITLE>\n";
362 : efrank 1.1 &show_initial($fig,$cgi,$html);
363 :     }
364 : parrello 1.115 Trace("Showing page.") if T(3);
365 : efrank 1.1 &HTML::show_page($cgi,$html,1);
366 : parrello 1.115 Trace("Page shown.") if T(3);
367 : overbeek 1.49 exit;
368 : efrank 1.1
369 : overbeek 1.17
370 :     #==============================================================================
371 :     # Initial page (alias search)
372 :     #==============================================================================
373 :    
374 : efrank 1.1 sub show_initial {
375 :     my($fig,$cgi,$html) = @_;
376 :     my($map,$name,$olrg,$gs);
377 :    
378 : overbeek 1.83
379 :     #
380 :     # Display the message of the day, if present.
381 :     #
382 :    
383 :     show_motd($fig, $cgi, $html);
384 :    
385 : golsen 1.113 # The original $a and $b conflicted with explicit sort variables (ouch):
386 :     # "Can't use "my $a" in sort comparison" -- GJO
387 :    
388 :     my( $at, $bt, $et, $v, $envt ) = $fig->genome_counts;
389 : parrello 1.115 push(@$html,$cgi->h2("Contains $at archaeal, $bt bacterial, $et eukaryal, $v viral and $envt environmental genomes"));
390 : golsen 1.113 my( $ac, $bc, $ec ) = $fig->genome_counts("complete");
391 :     push(@$html,$cgi->h2("Of these, $ac archaeal, $bc bacterial and $ec eukaryal genomes are more-or-less complete"),$cgi->hr);
392 : efrank 1.1
393 :     push(@$html,
394 : parrello 1.75 $cgi->h2('Work on Subsystems'),
395 : overbeek 1.46
396 : parrello 1.75 # $cgi->start_form(-action => "ssa2.cgi"),
397 :     # "Enter user: ",
398 :     # $cgi->textfield(-name => "user", -size => 20),
399 :     # $cgi->submit('Work on Subsystems'),
400 :     # $cgi->end_form,
401 :    
402 :     # $cgi->h2('Work on Subsystems Using New, Experimental Code'),
403 : overbeek 1.111 # "This is the <i>new</i> subsystems code, and is now officially released.",
404 : parrello 1.75 $cgi->start_form(-action => "subsys.cgi"),
405 :     "Enter user: ",
406 :     $cgi->textfield(-name => "user", -size => 20),
407 :     $cgi->submit('Work on Subsystems'),
408 :     $cgi->end_form,
409 :     $cgi->hr,
410 : overbeek 1.114 $cgi->start_form(-action => "ff.cgi"),
411 :     "Enter user: ",
412 :     $cgi->textfield(-name => "user", -size => 20),
413 :     $cgi->submit('Work on FIGfams'),
414 :     $cgi->end_form,
415 :     $cgi->hr,
416 : parrello 1.75 );
417 : olson 1.42
418 : golsen 1.95 push( @$html,
419 :     $cgi->start_form(-action => $this_script),
420 :     "<table>\n",
421 :     "<tr>",
422 : golsen 1.100 "<td colspan=2>", $cgi->h2('Searching for Genes or Functional Roles Using Text'), "</td>",
423 :     "<td align=right><a href='sdk_uniprot_search.cgi'>UniProt WebService Search</a></td>",
424 :     "</tr>\n",
425 :     "<tr>",
426 : golsen 1.95 "<td>Search Pattern: </td>",
427 :     "<td>", $cgi->textfield(-name => "pattern", -size => 65), "</td>",
428 :     "<td>", "Search <select name=search_kind>
429 :     <option value=DIRECT >Directly</option>
430 :     <option value=GO >Via Gene Ontology</option>
431 :     <option value=HUGO >Via HUGO Gene Nomenclature Committee</option>
432 :     </select></td>",
433 :     "</tr>\n",
434 :     "<tr>",
435 :     "<td>User ID:</td>",
436 :     "<td>",
437 :     $cgi->textfield(-name => "user", -size => 20), " [optional] &nbsp; &nbsp; ",
438 :     "Max Genes: ", $cgi->textfield(-name => "maxpeg", -size => 6, -value => 100), "&nbsp; &nbsp; ",
439 :     "Max Roles: ", $cgi->textfield(-name => "maxrole", -size => 6, -value => 100), "</td>",
440 :     "<td>", $cgi->checkbox(-name => "substring_match", -label => 'Allow substring match'), "</td>",
441 :     "</tr>\n",
442 :     "</table>\n",
443 :     $cgi->submit('Search'),
444 :     $cgi->submit('Search genome selected below'),
445 :     $cgi->reset('Clear'),
446 :     $cgi->hr
447 :     );
448 : olson 1.41
449 : golsen 1.113 #---------------------------------------------------------------------------
450 :     # Build the list of genomes from which the user can pick:
451 :     #---------------------------------------------------------------------------
452 :    
453 :     my $link;
454 :     ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/show_log.cgi/;
455 :    
456 :     push( @$html, $cgi->h2('If You Need to Pick a Genome for Options Below'),"&nbsp;[<a href=$link>Log</a>]");
457 :    
458 :     my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Plasmids', 'Viruses', 'Environmental samples' );
459 : golsen 1.47
460 :     # Canonical names must match the keywords used in the DBMS. They are
461 :     # defined in compute_genome_counts.pl
462 : golsen 1.113
463 : golsen 1.47 my %canonical = (
464 :     'All' => undef,
465 :     'Archaea' => 'Archaea',
466 :     'Bacteria' => 'Bacteria',
467 :     'Eucarya' => 'Eukaryota',
468 : golsen 1.113 'Plasmids' => 'Plasmid',
469 : golsen 1.47 'Viruses' => 'Virus',
470 :     'Environmental samples' => 'Environmental Sample'
471 :     );
472 :    
473 :     my $req_dom = $cgi->param( 'domain' ) || 'All';
474 :     my @domains = $cgi->radio_group( -name => 'domain',
475 :     -default => $req_dom,
476 :     -override => 1,
477 :     -values => [ @display ]
478 :     );
479 :    
480 :     my $n_domain = 0;
481 : golsen 1.113 my %dom_num = map { ( $_ => $n_domain++ ) } @display;
482 : golsen 1.47 my $req_dom_num = $dom_num{ $req_dom } || 0;
483 :    
484 : golsen 1.113 # Plasmids, Viruses and Environmental samples must have completeness
485 :     # = All (that is how they are in the database). Otherwise, default is
486 :     # Only "complete".
487 :    
488 : golsen 1.47 my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
489 :     : $cgi->param( 'complete' ) || 'Only "complete"';
490 :     my @complete = $cgi->radio_group( -name => 'complete',
491 :     -default => $req_comp,
492 :     -override => 1,
493 :     -values => [ 'All', 'Only "complete"' ]
494 :     );
495 : golsen 1.113
496 : golsen 1.47 # Use $fig->genomes( complete, restricted, domain ) to get org list:
497 : golsen 1.113
498 : golsen 1.47 my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
499 : golsen 1.113
500 : overbeek 1.107 my @orgs;
501 :     foreach my $org ($fig->genomes( $complete, undef, $canonical{ $req_dom } ))
502 :     {
503 :     my $gs = $fig->genus_species($org);
504 :     if ($fig->genome_domain($org) ne "Environmental Sample")
505 :     {
506 :     my $gc=$fig->number_of_contigs($org);
507 :     push @orgs, "$gs ($org) [$gc contigs]";
508 :     }
509 :     else
510 :     {
511 :     push @orgs, "$gs ($org)";
512 :     }
513 :     }
514 : golsen 1.113
515 :     # Make the sort case independent -- GJO
516 :    
517 :     # @orgs = sort { $a cmp $b } @orgs;
518 :     @orgs = sort { lc( $a ) cmp lc( $b ) } @orgs;
519 : overbeek 1.107
520 :    
521 : golsen 1.47
522 :     my $n_genomes = @orgs;
523 :    
524 : golsen 1.113 push( @$html, "<TABLE>\n",
525 :     " <TR VAlign=top>\n",
526 :     " <TD>",
527 : parrello 1.75 $cgi->scrolling_list( -name => 'korgs',
528 : golsen 1.47 -values => [ @orgs ],
529 : redwards 1.63 -size => 10,
530 : golsen 1.60 ), $cgi->br,
531 : golsen 1.47 "$n_genomes genomes shown ",
532 : golsen 1.60 $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
533 : parrello 1.75 "Show some ", $cgi->submit('statistics')," of the selected genome",
534 : golsen 1.113 " </TD>",
535 :    
536 :     " <TD><b>Domain(s) to show:</b>\n",
537 :     " <TABLE>\n",
538 :     " <TR VAlign=bottom>\n",
539 :     " <TD>", join( "<br>", @domains[0..3]), "</TD>\n",
540 :     " <TD>&nbsp;&nbsp;&nbsp;</TD>\n",
541 :     " <TD>", join( "<br>", @domains[4..$#domains]), "</TD>\n",
542 :     " </TR>\n",
543 :     " </TABLE>\n",
544 :     " ", join( "<br>", "<b>Completeness?</b>", @complete), "\n",
545 :     " </TD>",
546 :     " </TR>\n",
547 : golsen 1.47 "</TABLE>\n",
548 : golsen 1.60 $cgi->hr
549 :     );
550 : overbeek 1.49
551 : overbeek 1.112
552 : golsen 1.47 push( @$html, $cgi->h2('Finding Candidates for a Functional Role'),
553 : parrello 1.75 "Make sure that you type the functional role you want to search for in the Search Pattern above",
554 :     $cgi->br,
555 :     $cgi->submit('Find Genes in Org that Might Play the Role'),
556 :     $cgi->hr);
557 : overbeek 1.17
558 : golsen 1.60 my @maps = sort map { $map = $_; $name = $fig->map_name($map); "$name ($map)" } $fig->all_maps;
559 :    
560 : golsen 1.47 push( @$html, $cgi->h2('Metabolic Overviews and Subsystem Maps (via KEGG & SEED) - Choose Map'),
561 : parrello 1.75 $cgi->submit('Metabolic Overview'),
562 :     $cgi->br,
563 :     $cgi->br,
564 :     $cgi->scrolling_list(-name => 'kmap',
565 : efrank 1.1 -values => [@maps],
566 : parrello 1.75 -size => 10
567 : efrank 1.1 ),
568 : parrello 1.75 $cgi->hr);
569 : overbeek 1.17
570 : golsen 1.47 push( @$html, $cgi->h2('Searching DNA or Protein Sequences (in a selected organism)'),
571 : parrello 1.75 "<TABLE>\n",
572 :     " <TR>\n",
573 :     " <TD>Sequence/Pattern: </TD>",
574 :     " <TD Colspan=3>", $cgi->textarea(-name => 'seq_pat', -rows => 10, -cols => 70), "</TD>\n",
575 :     " </TR>\n",
576 :     " <TR>\n",
577 :     " <TD>Search Program: </TD>",
578 :     " <TD>", $cgi->popup_menu(-name => 'Tool', -values => ['blastp', 'blastx', 'blastn', 'tblastn', 'blastp against complete genomes', 'Protein scan_for_matches', 'DNA scan_for_matches'], -default => 'blastp'), " </TD>",
579 :     " <TD> Program Options:</TD>",
580 :     " <TD>", $cgi->textfield( -name => "blast_options", -size => 27 ), "</TD>",
581 :     " </TR>\n",
582 :     "</TABLE>\n",
583 :     $cgi->submit('Search for Matches'),
584 :     $cgi->hr);
585 : overbeek 1.17
586 : olson 1.41 #
587 :     # Make assignment export tbl.
588 :     #
589 :    
590 :     my @atbl;
591 : golsen 1.64 push(@atbl, [ "Extract assignments made by ",
592 : parrello 1.75 $cgi->textfield(-name => "made_by", -size => 25) . " (do not prefix with <b>master:</b>)" ]);
593 : golsen 1.64 push(@atbl, [ "Save as user: ",
594 : parrello 1.75 $cgi->textfield(-name => "save_user", -size => 25) . " (do not prefix with <b>master:</b>)" ] );
595 : golsen 1.64 push(@atbl, [ "After date (MM/DD/YYYY) ",
596 : parrello 1.75 $cgi->textfield(-name => "after_date", -size => 15)]);
597 : olson 1.41
598 :     push(@$html,
599 : overbeek 1.84 $cgi->h2($cgi->a({name => "exporting_assignments"}, 'Exporting Assignments')),
600 : parrello 1.75 &HTML::make_table(undef, \@atbl, '', border => 0),
601 :     $cgi->checkbox(-label => 'Tab-delimited Spreadsheet', -name => 'tabs', -value => 1),
602 :     $cgi->br,
603 :     $cgi->checkbox(-label => 'Save Assignments', -name => 'save_assignments', -value => 1),
604 :     $cgi->br,
605 :     $cgi->submit('Extract Assignments'),
606 :     $cgi->br, $cgi->br, $cgi->br,
607 :     "Alternatively, you can generate a set of assignments as translations of existing assignments. ",
608 :     "To do so, you need to make sure that you fill in the <b>Save as user</b> field just above. You ",
609 :     "should use something like <b>RossO</b> (leave out the <b>master:</b>). When you look at the assignments (and decide which ",
610 :     "to actually install), they will be made available under that name (but, when you access them, ",
611 :     "you will normally be using something like <b>master:RossO</b>)",
612 :     $cgi->br,$cgi->br,
613 :     "From: ",
614 :     $cgi->textarea(-name => 'from_func', -rows => 4, -cols => 100),
615 :     $cgi->br,$cgi->br,
616 :     "To:&nbsp;&nbsp;&nbsp;&nbsp; ",$cgi->textfield(-name => "to_func", -size => 100),
617 :     $cgi->br,
618 : golsen 1.103 "<TABLE Width=100%><TR><TD>",
619 :     $cgi->submit('Generate Assignments via Translation'),
620 :     "</TD><TD NoWrap Width=1%>",
621 : overbeek 1.76 $cgi->a({class=>"help", target=>"help", href=>"Html/seedtips.html#replace_names"}, "Help with generate assignments via translation"),
622 : golsen 1.103 "</TD></TR></TABLE>\n"
623 : golsen 1.95 );
624 :    
625 :     push(@$html,
626 : parrello 1.75 $cgi->hr,
627 :     $cgi->h2('Searching for Interesting Genes'),
628 :     $cgi->submit('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'),
629 : overbeek 1.17 $cgi->end_form
630 : parrello 1.75 );
631 : overbeek 1.14
632 :     push(@$html,
633 : parrello 1.75 $cgi->hr,
634 :     $cgi->h2('Process Saved Assignments Sets'),
635 :     $cgi->start_form(-action => "assignments.cgi"),
636 :     "Here you should include the <b>master:</b>. Thus use something like <b>master:RossO</b>",$cgi->br,
637 :     $cgi->br,
638 :     "Enter user: ",
639 :     $cgi->textfield(-name => "user", -size => 20),
640 :     $cgi->submit('Process Assignment Sets'),
641 : overbeek 1.17 $cgi->end_form
642 : parrello 1.75 );
643 : efrank 1.1
644 : overbeek 1.19 push(@$html,
645 : parrello 1.75 $cgi->hr,
646 : overbeek 1.112 $cgi->h2('Locate clustered genes not in subsystems'),
647 :     $cgi->start_form(-action => "find_ss_genes.cgi"),
648 :     $cgi->br,
649 :     "Enter user: ",
650 :     $cgi->textfield(-name => "user", -size => 20),
651 :     $cgi->submit('Find Clustered Genes'),
652 :     $cgi->end_form
653 :     );
654 :    
655 :     push(@$html,
656 :     $cgi->hr,
657 : parrello 1.75 $cgi->h2('Align Sequences'),
658 : olson 1.89 $cgi->start_form(-action => $this_script),
659 : parrello 1.75 "Enter user: ",
660 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
661 :     $cgi->submit('Align Sequences'),": ",
662 :     $cgi->textfield(-name => "seqids", -size => 100),
663 : overbeek 1.31 $cgi->end_form
664 : parrello 1.75 );
665 : overbeek 1.51
666 :     push(@$html,
667 : parrello 1.75 $cgi->hr,
668 :     $cgi->h2('Locate PEGs in Subsystems'),
669 :     "If you wish to locate PEGs in subsystems, you have two approaches supported. You can
670 : overbeek 1.56 give a FIG id, and you will get a list of all homologs in the designated genome that occur in subsystems.
671 :     Alternatively, you can specify a functional role, and all PEGs in the genome that match that role will be shown.",
672 : olson 1.89 $cgi->start_form(-action => $this_script),
673 : parrello 1.75 "Enter user: ",
674 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
675 :     $cgi->br,"Genome: ",$cgi->textfield(-name => "genome", -size => 15),$cgi->br,
676 :     "Search: ",$cgi->textfield(-name => "subsearch", -size => 100),$cgi->br,
677 :     $cgi->submit('Find PEGs'),": ",
678 : overbeek 1.51 $cgi->end_form
679 : parrello 1.75 );
680 : overbeek 1.116 push(@$html,
681 :     $cgi->hr,
682 :     $cgi->h2('Compare Metabolic Reconstructions'),
683 :     "If you wish to compare the reconstructions for two distinct genomes, use this tool.
684 :     You should specify two genomes, or a P1K server output directory (as genome1) and a second genome (which
685 :     must be a valid genome ID that exists in this SEED). You can ask for functional roles/subsystems that the
686 :     genomes have in common, those that exist in genome1 only, or those that exist in only genome2.",
687 :     $cgi->start_form(-action => 'comp_MR.cgi'),
688 :     "Enter user: ",
689 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
690 :     $cgi->br,"Genome1: ",$cgi->textfield(-name => "genome1", -size => 40),$cgi->br,
691 :     $cgi->br,"Genome2: ",$cgi->textfield(-name => "genome2", -size => 15),
692 :     $cgi->scrolling_list( -name => 'request',
693 :     -values => [ 'common', 'in1_not2','in2_not1' ],
694 :     -size => 3,
695 :     ), $cgi->br,
696 :     $cgi->submit('Compare Reconstructions'),": ",
697 :     $cgi->end_form
698 :     );
699 : overbeek 1.117
700 :     push(@$html,
701 :     $cgi->hr,
702 :     $cgi->h2('Compare Genomes'),
703 :     "If you wish to compare the contents of several genomes, you can use this tool.
704 :     Choose a set of genomes (at least two).<br><br> ",
705 :     $cgi->start_form(-action => 'comp_genomes.cgi'),
706 :     "Enter user: ",
707 :     $cgi->textfield(-name => "user", -size => 20), $cgi->br,
708 :     $cgi->scrolling_list( -name => 'comp_orgs',
709 :     -values => [ @orgs ],
710 :     -size => 10,
711 :     -multiple => 1,
712 :     ), $cgi->br,$cgi->br,
713 : overbeek 1.118 "<br><br>",
714 : overbeek 1.117 "Optionally, you can select a PEG and window size to limit the comparison:<br>",
715 :     "PEG: ", $cgi->textfield(-name => "peg", -size => 20), $cgi->br,
716 :     "Window Size: ", $cgi->textfield(-name => "sz", -size => 8, -value => 20000), $cgi->br,
717 :    
718 : overbeek 1.119 $cgi->submit('Compare Genomes'),
719 :     $cgi->submit('Update Functions in MouseOvers'),"<br>",
720 : overbeek 1.117 $cgi->end_form
721 :     );
722 : efrank 1.1 }
723 :    
724 : overbeek 1.83 #
725 :     # Show a message of the day if it's present.
726 :     #
727 :     sub show_motd
728 :     {
729 :     my($fig, $cgi, $html) = @_;
730 :    
731 :     my $motd_file = "$FIG_Config::fig_disk/config/motd";
732 :    
733 :     if (open(F, "<$motd_file"))
734 :     {
735 :     push(@$html, "<p>\n");
736 :     while (<F>)
737 :     {
738 :     push(@$html, $_);
739 :     }
740 :     close(F);
741 :     push(@$html, "<hr>\n");
742 :     }
743 :     }
744 : overbeek 1.17
745 :     #==============================================================================
746 :     # Indexed objects (text search)
747 :     #==============================================================================
748 :    
749 : efrank 1.1 sub show_indexed_objects {
750 : golsen 1.22 my($fig, $cgi, $html, $pattern) = @_;
751 :     my($msg, $i);
752 : efrank 1.1
753 :     if ($pattern =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
754 :     {
755 : parrello 1.75 my $peg = $1;
756 :     my $user = $cgi->param('user');
757 :     $user = $user ? $user : "";
758 : paczian 1.102 my @prot_out;
759 :     if (defined($cgi->param('fromframe'))) {
760 :     $ENV{'REQUEST_METHOD'} = "GET";
761 :     $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user\&action=proteinpage";
762 :     $ENV{"REQUEST_URI"} =~ s/$this_script/frame.cgi/;
763 :     @prot_out = TICK("./frame.cgi");
764 :     } else {
765 :     $ENV{'REQUEST_METHOD'} = "GET";
766 :     $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user";
767 :     $ENV{"REQUEST_URI"} =~ s/$this_script/protein.cgi/;
768 :     @prot_out = TICK("./protein.cgi");
769 :     }
770 : parrello 1.75 print @prot_out;
771 :     exit;
772 : efrank 1.1 }
773 : overbeek 1.71 $pattern =~ s/([a-zA-Z0-9])\|([a-zA-Z0-9])/$1\\\|$2/ig;
774 : efrank 1.92
775 :     my $search_kind = $cgi->param("search_kind");
776 :     if ( $search_kind && ! ($search_kind eq "DIRECT") ) {
777 :     #otherwise $search_kind is name of controlled vocab
778 :     find_pegs_by_cv($fig, $cgi, $html, $user, $pattern, $search_kind);
779 :     return;
780 :     }
781 :    
782 : overbeek 1.17 push( @$html, $cgi->br );
783 : olson 1.70 my( $peg_index_data, $role_index_data ) = $fig->search_index($pattern, $cgi->param("substring_match") eq "on");
784 : overbeek 1.17 my $maxpeg = defined( $cgi->param("maxpeg") ) ? $cgi->param("maxpeg") : 100;
785 :     my $maxrole = defined( $cgi->param("maxrole") ) ? $cgi->param("maxrole") : 100;
786 : mkubal 1.109 my $output_file = "$FIG_Config::temp/search_results.txt";
787 : parrello 1.115 Trace("Producing search output file $output_file") if T(3);
788 : mkubal 1.109 open(OUT,">$output_file");
789 : overbeek 1.17
790 : redwards 1.53 # RAE added lines to allow searching within a single organism
791 : golsen 1.59 # if ($cgi->param('korgs'))
792 :     # {
793 :     # $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/;
794 :     # $org=$1; # this should be undef if korgs is not defined
795 :    
796 :     # push (@$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p);
797 :     # my @clean_data; my @clean_index;
798 :     # while (@$peg_index_data)
799 :     # {
800 :     # my ($data, $index)=(shift @$peg_index_data, shift @$role_index_data);
801 :     # next unless (${$data}[0] =~ /^fig\|$org\.peg/);
802 :     # push @clean_data, $data;
803 :     # push @clean_index, $index;
804 :     # }
805 :    
806 :     # @$peg_index_data=@clean_data;
807 :     # @$role_index_data=@clean_index;
808 :     # }
809 :     ## End of added lines
810 : redwards 1.53
811 : mkubal 1.99 # RAE version with separate submit buttoxns and more than one org in korg
812 : redwards 1.63 # this is used by organisms.cgi for group specific searches
813 :     if ( $cgi->param('korgs') && $cgi->param('Search Selected Organisms')
814 :     )
815 :     {
816 :     my @temp;
817 :     foreach my $org ($cgi->param('korgs'))
818 :     {
819 :     push @temp, grep { $_->[0] =~ /^fig\|$org/ } @$peg_index_data;
820 :     }
821 :     @$peg_index_data = @temp;
822 :     }
823 :    
824 : golsen 1.59 # GJO version with separate submit buttons
825 : redwards 1.53
826 : golsen 1.59 if ( $cgi->param('korgs') && $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/
827 :     && $cgi->param('Search genome selected below')
828 :     )
829 :     {
830 : parrello 1.75 my $org = $1;
831 :     push @$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p;
832 : mkubal 1.79 @$peg_index_data = grep { $_->[0] =~ /^fig\|$org\.*/ } @$peg_index_data;
833 : redwards 1.53 }
834 : parrello 1.115 Trace("Initial push.") if T(3);
835 : golsen 1.59 if ( ( $maxpeg > 0 ) && @$peg_index_data )
836 : overbeek 1.17 {
837 : parrello 1.75 # RAE: Added javascript buttons see below. Only two things are needed.
838 :     # The form must have a name parameter, and the one line of code for the
839 :     # buttons. Everything else is automatic
840 :    
841 :     push( @$html, $cgi->start_form( -method => 'post',
842 :     -target => "window$$",
843 :     -action => 'fid_checked.cgi',
844 :     -name => 'found_pegs'
845 :     ),
846 :     $cgi->hidden(-name => 'user', -value => $user),
847 :     "For Selected (checked) sequences: ",
848 :     $cgi->submit('get sequences'),
849 :     $cgi->submit('view annotations'),
850 :     $cgi->submit('assign/annotate'),
851 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
852 :     $cgi->br, $cgi->br
853 :     );
854 : efrank 1.1
855 : redwards 1.63 # RAE Add the check all/uncheck all boxes.
856 :     push (@$html, $cgi->br, &HTML::java_buttons("found_pegs", "checked"), $cgi->br);
857 : parrello 1.75
858 :     my $n = @$peg_index_data;
859 :     if ($n > $maxpeg)
860 :     {
861 :     $msg = "Showing first $maxpeg out of $n protein genes";
862 :     $#{$peg_index_data} = $maxpeg-1;
863 :     }
864 :     else
865 :     {
866 : mkubal 1.79 $msg = "Showing $n FEATURES";
867 : parrello 1.75 }
868 :    
869 : parrello 1.115 my $col_hdrs = ["Sel","FEATURE","Organism","Aliases","Functions","Who","Attributes"];
870 : overbeek 1.106 my $tab = [ map { format_peg_entry( $fig, $cgi, $_ ) } sort {$a->[1] cmp $b->[1]} @$peg_index_data ];
871 : mkubal 1.109
872 :     my $tab2 = [ sort {$a->[1] cmp $b->[1]} @$peg_index_data ];
873 : parrello 1.115 Trace("Final html push.") if T(3);
874 : mkubal 1.109 push( @$html,$cgi->br,
875 :     "<a href=$FIG_Config::temp_url/search_results.txt>Download_Search_Results</a>",
876 :     &HTML::make_table($col_hdrs,$tab,$msg),
877 : parrello 1.75 $cgi->br,
878 :     "For SELECTed (checked) sequences: ",
879 :     $cgi->submit('get sequences'),
880 :     $cgi->submit('view annotations'),
881 :     $cgi->submit('assign/annotate'),
882 :     $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
883 :     $cgi->br,
884 :     $cgi->end_form
885 : mkubal 1.109 );
886 :    
887 :     foreach my $t (@$tab2){
888 :     my $string = join("\t",@$t);
889 :     print OUT "$string\n";
890 :     }
891 :    
892 : efrank 1.1 }
893 : golsen 1.59 elsif ( $maxpeg > 0 )
894 :     {
895 : parrello 1.75 push @$html, $cgi->h3('No matching protein genes');
896 : golsen 1.59 }
897 : overbeek 1.17
898 : golsen 1.59 if ( ( $maxrole > 0 ) && @$role_index_data )
899 : efrank 1.1 {
900 : parrello 1.75 my $n = @$role_index_data;
901 :     if ($n > $maxrole)
902 :     {
903 :     $msg = "Showing first $maxrole out of $n Roles";
904 :     $#{$role_index_data} = $maxrole - 1;
905 :     }
906 :     else
907 :     {
908 :     $msg = "Showing $n Roles";
909 :     }
910 :    
911 :     if ( $maxpeg > 0 ) { push( @$html, $cgi->hr ) }
912 :     my $col_hdrs = ["Role"];
913 :     my $tab = [ map { &format_role_entry($fig,$cgi,$_) } @$role_index_data ];
914 :     push( @$html, &HTML::make_table($col_hdrs,$tab,$msg) );
915 : efrank 1.1 }
916 : golsen 1.59 elsif ( $maxrole > 0 )
917 :     {
918 : parrello 1.75 push @$html, $cgi->h3('No matching roles');
919 : golsen 1.59 }
920 : parrello 1.115 Trace("Show-indexed-objects method complete.") if T(3);
921 : efrank 1.1 }
922 :    
923 : golsen 1.59
924 : efrank 1.1 sub format_peg_entry {
925 : golsen 1.67 my( $fig, $cgi, $entry ) = @_;
926 : efrank 1.1
927 : parrello 1.115 my($peg,$gs,$aliases,$function,$who,$attribute) = @$entry;
928 : overbeek 1.17
929 : golsen 1.21 $gs =~ s/\s+\d+$//; # Org name comes with taxon_id appended (why?) -- GJO
930 : efrank 1.1
931 : golsen 1.67 my $box = "<input type=checkbox name=checked value=\"$peg\">";
932 : overbeek 1.17 return [ $box, &HTML::fid_link($cgi,$peg), $gs, $aliases, $function, $who ];
933 : efrank 1.1 }
934 :    
935 :     sub format_role_entry {
936 :     my($fig,$cgi,$entry) = @_;
937 :    
938 :     return [&HTML::role_link($cgi,$entry)];
939 :     }
940 :    
941 :     sub run_prot_scan_for_matches {
942 :     my($fig,$cgi,$html,$org,$pat) = @_;
943 :     my($string,$peg,$beg,$end,$user,$col_hdrs,$tab,$i);
944 :    
945 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
946 :     open(PAT,">$tmp_pat")
947 : parrello 1.75 || die "could not open $tmp_pat";
948 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
949 :     print PAT "$pat\n";
950 :     close(PAT);
951 :     my @out = `$FIG_Config::ext_bin/scan_for_matches -p $tmp_pat < $FIG_Config::organisms/$org/Features/peg/fasta`;
952 :     if (@out < 1)
953 :     {
954 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
955 : efrank 1.1 }
956 :     else
957 :     {
958 : parrello 1.75 if (@out > 2000)
959 :     {
960 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
961 :     $#out = 1999;
962 :     }
963 :    
964 :     push(@$html,$cgi->pre);
965 :     $user = $cgi->param('user');
966 :     $col_hdrs = ["peg","begin","end","string","function of peg"];
967 :     for ($i=0; ($i < @out); $i += 2)
968 :     {
969 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
970 :     {
971 :     $peg = $1;
972 :     $beg = $2;
973 :     $end = $3;
974 :     $string = $out[$i+1];
975 :     chomp $string;
976 :     push( @$tab, [ &HTML::fid_link($cgi,$peg,1),
977 :     $beg,
978 :     $end,
979 :     $string,
980 :     scalar $fig->function_of( $peg, $user )
981 :     ]
982 :     );
983 :     }
984 :     }
985 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
986 :     push(@$html,$cgi->end_pre);
987 : efrank 1.1 }
988 :     unlink($tmp_pat);
989 :     }
990 :    
991 : overbeek 1.17 #==============================================================================
992 :     # Scan for matches
993 :     #==============================================================================
994 :    
995 : efrank 1.1 sub run_dna_scan_for_matches {
996 :     my($fig,$cgi,$html,$org,$pat) = @_;
997 :     my($string,$contig,$beg,$end,$col_hdrs,$tab,$i);
998 :    
999 :     my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
1000 :     open(PAT,">$tmp_pat")
1001 : parrello 1.75 || die "could not open $tmp_pat";
1002 : efrank 1.1 $pat =~ s/[\s\012\015]+/ /g;
1003 :     print PAT "$pat\n";
1004 :     close(PAT);
1005 :     my @out = `cat $FIG_Config::organisms/$org/contigs | $FIG_Config::ext_bin/scan_for_matches -c $tmp_pat`;
1006 :     if (@out < 1)
1007 :     {
1008 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no hits"));
1009 : efrank 1.1 }
1010 :     else
1011 :     {
1012 : parrello 1.75 if (@out > 2000)
1013 :     {
1014 :     push(@$html,$cgi->h1("truncating to the first 1000 hits"));
1015 :     $#out = 1999;
1016 :     }
1017 :    
1018 :     push(@$html,$cgi->pre);
1019 :     $col_hdrs = ["contig","begin","end","string"];
1020 :     for ($i=0; ($i < @out); $i += 2)
1021 :     {
1022 :     if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
1023 :     {
1024 :     $contig = $1;
1025 :     $beg = $2;
1026 :     $end = $3;
1027 :     $string = $out[$i+1];
1028 :     chomp $string;
1029 :     push(@$tab,[$contig,$beg,$end,$string]);
1030 :     }
1031 :     }
1032 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
1033 :     push(@$html,$cgi->end_pre);
1034 : efrank 1.1 }
1035 :     unlink($tmp_pat);
1036 :     }
1037 :    
1038 : overbeek 1.17 #==============================================================================
1039 :     # BLAST search
1040 :     #==============================================================================
1041 :    
1042 : efrank 1.1 sub run_blast {
1043 : golsen 1.45 my( $fig, $cgi, $html, $org, $tool, $seq ) = @_;
1044 :     my( $query, @out );
1045 : efrank 1.1
1046 : golsen 1.45 my $tmp_seq = "$FIG_Config::temp/run_blast_tmp$$.seq";
1047 : efrank 1.1
1048 : overbeek 1.17 #--------------------------------------------------------------------------
1049 : golsen 1.97 # Does the request require a defined genome? We never check that the
1050 :     # database build works, so the least we can do is some up-front tests.
1051 :     # -- GJO
1052 :     #--------------------------------------------------------------------------
1053 :    
1054 :     if ( $tool !~ /complete genomes/ )
1055 :     {
1056 :     if ( ! $org || ! -d "$FIG_Config::organisms/$org" )
1057 :     {
1058 :     push @$html, $cgi->h2("Sorry, $tool requires selecting a genome." );
1059 :     return;
1060 :     }
1061 :    
1062 :     if ( ( $tool =~ /blastn/ ) || ( $tool =~ /tblastx/ ) )
1063 :     {
1064 :     if ( ! -f "$FIG_Config::organisms/$org/contigs" )
1065 :     {
1066 :     push @$html, $cgi->h2("Sorry, cannot find DNA data for genome $org." );
1067 :     return;
1068 :     }
1069 :     }
1070 :     else
1071 :     {
1072 :     if ( ! -f "$FIG_Config::organisms/$org/Features/peg/fasta" )
1073 :     {
1074 :     push @$html, $cgi->h2("Sorry, cannot find protein data for genome $org." );
1075 :     return;
1076 :     }
1077 :     }
1078 :     }
1079 :    
1080 :     #--------------------------------------------------------------------------
1081 : overbeek 1.17 # Is the request for an id? Get the sequence
1082 :     #--------------------------------------------------------------------------
1083 : golsen 1.97
1084 : golsen 1.91 if ( ( $query ) = $seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/ )
1085 : efrank 1.1 {
1086 : parrello 1.75 # Replaced $id with $query so that output inherits label -- GJO
1087 : golsen 1.91 # Found ugly fairure to build correct query sequence for
1088 :     # 'blastp against complete genomes'. Can't figure out
1089 :     # why it ever worked with and id -- GJO
1090 :    
1091 : parrello 1.75 $seq = "";
1092 : golsen 1.91 if ( ($tool eq "blastp") || ($tool eq "tblastn")
1093 :     || ($tool eq 'blastp against complete genomes')
1094 :     )
1095 : parrello 1.75 {
1096 :     $seq = $fig->get_translation($query);
1097 : golsen 1.97 my $func = $fig->function_of( $query, $user );
1098 :     $query .= " $func" if $func;
1099 : parrello 1.75 }
1100 :     elsif ($query =~ /^fig/)
1101 :     {
1102 :     my @locs;
1103 :     if ((@locs = $fig->feature_location($query)) && (@locs > 0))
1104 :     {
1105 :     $seq = $fig->dna_seq($fig->genome_of($query),@locs);
1106 :     }
1107 :     }
1108 :     if (! $seq)
1109 :     {
1110 :     push(@$html,$cgi->h1("Sorry, could not get sequence for $query"));
1111 :     return;
1112 :     }
1113 : efrank 1.1 }
1114 : golsen 1.45
1115 : overbeek 1.17 #--------------------------------------------------------------------------
1116 :     # Is it a fasta format? Get the query name
1117 :     #--------------------------------------------------------------------------
1118 : golsen 1.45
1119 :     elsif ( $seq =~ s/^>\s*(\S+[^\n\012\015]*)// ) # more flexible match -- GJO
1120 : efrank 1.1 {
1121 : parrello 1.75 $query = $1;
1122 : efrank 1.1 }
1123 : golsen 1.45
1124 : overbeek 1.17 #--------------------------------------------------------------------------
1125 :     # Take it as plain text
1126 :     #--------------------------------------------------------------------------
1127 : golsen 1.45
1128 : efrank 1.1 else
1129 :     {
1130 : parrello 1.75 $query = "query";
1131 : efrank 1.1 }
1132 : golsen 1.45
1133 :     #
1134 :     # The rest is taken as the sequence
1135 :     #
1136 :    
1137 : golsen 1.23 $seq =~ s/\s+//g;
1138 : golsen 1.45 open( SEQ, ">$tmp_seq" ) || die "run_blast could not open $tmp_seq";
1139 : efrank 1.1 print SEQ ">$query\n$seq\n";
1140 : golsen 1.45 close( SEQ );
1141 : efrank 1.1
1142 :     if (! $ENV{"BLASTMAT"}) { $ENV{"BLASTMAT"} = "$FIG_Config::blastmat" }
1143 : golsen 1.88 my $blast_opt = $cgi->param( 'blast_options' ) || '';
1144 : efrank 1.1
1145 : golsen 1.45 if ( $tool eq "blastp" )
1146 : efrank 1.1 {
1147 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
1148 :     &verify_db( $db, "p" );
1149 : golsen 1.88 @out = map { &HTML::set_prot_links($cgi,$_) } execute_blastall( 'blastp', $tmp_seq, $db, $blast_opt );
1150 : efrank 1.1 }
1151 : golsen 1.45
1152 :     elsif ( $tool eq "blastx" )
1153 : efrank 1.1 {
1154 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
1155 :     &verify_db( $db, "p" );
1156 : golsen 1.88 @out = map { &HTML::set_prot_links($cgi,$_) } execute_blastall( 'blastx', $tmp_seq, $db, $blast_opt );
1157 : efrank 1.1 }
1158 : golsen 1.45
1159 :     elsif ( $tool eq "blastn" )
1160 : efrank 1.1 {
1161 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
1162 :     &verify_db( $db, "n" ); ### fix to get all contigs
1163 : golsen 1.88 @out = execute_blastall( 'blastn', $tmp_seq, $db, "-r 1 -q -1 " . $blast_opt );
1164 : parrello 1.75 push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
1165 : efrank 1.1 }
1166 : golsen 1.45
1167 :     elsif ( $tool eq "tblastn" )
1168 : efrank 1.1 {
1169 : parrello 1.75 my $db = "$FIG_Config::organisms/$org/contigs";
1170 :     &verify_db( $db, "n" ); ### fix to get all contigs
1171 : golsen 1.88 @out = execute_blastall( 'tblastn', $tmp_seq, $db, $blast_opt );
1172 : parrello 1.75 push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
1173 : efrank 1.1 }
1174 : golsen 1.45
1175 :     elsif ( $tool eq 'blastp against complete genomes' ) ### this tool gets nonstandard treatment: RAO
1176 : overbeek 1.30 {
1177 : golsen 1.103 &blast_complete( $fig, $cgi, $html, $tmp_seq, $query, $seq );
1178 : golsen 1.91 unlink( $tmp_seq );
1179 :     return;
1180 : overbeek 1.30 }
1181 : golsen 1.45
1182 : overbeek 1.17 if (@out < 1) # This is really a bigger problem than no hits (GJO)
1183 : efrank 1.1 {
1184 : golsen 1.88 push @$html, $cgi->h1( "Sorry, no blast output" );
1185 : efrank 1.1 }
1186 :     else
1187 :     {
1188 : golsen 1.88 push @$html, $cgi->pre, @out, $cgi->end_pre;
1189 : efrank 1.1 }
1190 : golsen 1.45 unlink( $tmp_seq );
1191 : efrank 1.1 }
1192 :    
1193 : golsen 1.45
1194 : golsen 1.88 # `$blastall -p $prog -i $tmp_seq -d $db $blast_opt`
1195 :     # execute_blastall( $prog, $input_file, $db, $options )
1196 :    
1197 :     sub execute_blastall
1198 :     {
1199 :     my( $prog, $input, $db, $options ) = @_;
1200 :    
1201 :     my $blastall = "$FIG_Config::ext_bin/blastall";
1202 :     my @args = ( '-p', $prog, '-i', $input, '-d', $db, split(/\s+/, $options) );
1203 :    
1204 :     my $bfh;
1205 :     my $pid = open( $bfh, "-|" );
1206 :     if ( $pid == 0 )
1207 :     {
1208 :     exec( $blastall, @args );
1209 :     die join( " ", $blastall, @args, "failed: $!" );
1210 :     }
1211 :    
1212 : golsen 1.91 <$bfh>
1213 : golsen 1.88 }
1214 :    
1215 :    
1216 : golsen 1.91 # Changed to:
1217 :     # Include low complexity filter in blast search.
1218 :     # Remove all but first match to a given database sequence.
1219 :     # Sort by bit-score, not E-value (which becomes equal for all strong matches).
1220 :     # Limit to 1000 matches.
1221 :     # -- GJO
1222 :    
1223 :     sub blast_complete
1224 :     {
1225 : golsen 1.103 my( $fig, $cgi, $html, $seqfile, $query, $seq ) = @_;
1226 : golsen 1.88 my( $genome, @sims );
1227 :    
1228 : overbeek 1.30 @sims = ();
1229 : golsen 1.91 foreach $genome ( $fig->genomes("complete") )
1230 : overbeek 1.30 {
1231 : parrello 1.75 my $db = "$FIG_Config::organisms/$genome/Features/peg/fasta";
1232 :     next if (! -s $db);
1233 : overbeek 1.30
1234 : parrello 1.75 &verify_db($db,"p");
1235 :     my $sim;
1236 : golsen 1.91 my %seen = ();
1237 :     push @sims, map { chomp;
1238 :     $sim = [ split /\t/ ];
1239 :     $sim->[10] =~ s/^e-/1.0e-/;
1240 :     $seen{ $sim->[1] }++ ? () : $sim
1241 :     }
1242 :     execute_blastall( 'blastp', $seqfile, $db, '-m 8 -F T -e 1e-5' );
1243 : overbeek 1.30 }
1244 : golsen 1.91
1245 :     @sims = sort { $b->[11] <=> $a->[11] } @sims;
1246 :     if ( @sims > 1000 ) { @sims = @sims[0 .. 999] }
1247 : golsen 1.103 &format_sims( $fig, $cgi, $html, \@sims, $query, $seq );
1248 : overbeek 1.30 }
1249 :    
1250 : golsen 1.65
1251 :     #------------------------------------------------------------------------------
1252 : golsen 1.97 # Graphically display search results on contigs
1253 : golsen 1.65 #
1254 :     # use FIGjs qw( toolTipScript );
1255 :     # use GenoGraphics qw( render );
1256 :     #------------------------------------------------------------------------------
1257 :     #
1258 : golsen 1.97 # Fields produced by next_blast_hsp:
1259 :     #
1260 :     # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1261 :     # qid qdef qlen sid sdef slen scr e_val p_n p_val n_mat n_id n_pos n_gap dir q1 q2 qseq s1 s2 sseq
1262 : golsen 1.65 #------------------------------------------------------------------------------
1263 :    
1264 :     sub blast_graphics {
1265 :     my ( $fig_or_sprout, $cgi, $genome, $out, $tool ) = @_;
1266 :    
1267 :     my $e_min = 0.1;
1268 :     my $gg = [];
1269 : golsen 1.97 my @html = ();
1270 : golsen 1.65
1271 : golsen 1.97 # Changed to use standalone parsing function, not shell script -- GJO
1272 : golsen 1.65
1273 : golsen 1.97 my $outcopy = [ @$out ];
1274 :     while ( $_ = &gjoparseblast::next_blast_hsp( $outcopy ) )
1275 : golsen 1.65 {
1276 : golsen 1.97 my ( $qid, $qlen, $contig, $slen ) = @$_[0, 2, 3, 5 ];
1277 :     my ( $e_val, $n_mat, $n_id, $q1, $q2, $s1, $s2 ) = @$_[ 7, 10, 11, 15, 16, 18, 19 ];
1278 :     next if $e_val > $e_min;
1279 :     my ( $genes, $min, $max ) = hsp_context( $fig_or_sprout, $cgi, $genome,
1280 :     $e_val, 100 * $n_id / $n_mat,
1281 :     $qid, $q1, $q2, $qlen,
1282 :     $contig, $s1, $s2, $slen
1283 :     );
1284 :     if ($min && $max)
1285 : golsen 1.65 {
1286 : golsen 1.97 push @$gg, [ substr( $contig, 0, 18 ), $min, $max, $genes ];
1287 : golsen 1.65 }
1288 : golsen 1.97 }
1289 : golsen 1.65
1290 : golsen 1.97 # $gene = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
1291 :     # $genes = [ $gene, $gene, ... ];
1292 :     # $map = [ $label, $min_coord, $max_coord, $genes ];
1293 :     # $gg = [ $map, $map, ... ];
1294 :     # render( $gg, $width, $obj_half_heigth, $save, $img_index_number )
1295 :    
1296 :     if ( @$gg )
1297 :     {
1298 :     # print STDERR Dumper( $gg );
1299 :     my $gs = $fig_or_sprout->genus_species( $genome );
1300 :     my $space = "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
1301 :     my $legend = "<TABLE>\n"
1302 :     . " <TR>\n"
1303 :     . " <TD>Q = Query sequence$space</TD>\n"
1304 :     . " <TD Bgcolor='#FF0000'>$space</TD><TD>Frame 1 translation$space</TD>\n"
1305 :     . " <TD Bgcolor='#00FF00'>$space</TD><TD>Frame 2 translation$space</TD>\n"
1306 :     . " <TD Bgcolor='#0000FF'>$space</TD><TD>Frame 3 translation$space</TD>\n"
1307 :     . " <TD Bgcolor='#808080'>$space</TD><TD>Untranslated feature</TD>\n"
1308 :     . " </TR>\n"
1309 :     . "</TABLE><P />";
1310 :    
1311 :     push @html, "\n", FIGjs::toolTipScript(), "\n",
1312 :     $cgi->h2( "Results of $tool search of contigs from $gs\n"),
1313 :     $legend,
1314 :     @{ GenoGraphics::render( $gg, 600, 4, 0, 1 ) },
1315 :     $cgi->hr, "\n";
1316 : golsen 1.65 }
1317 :    
1318 :     return @html;
1319 :     }
1320 :    
1321 :    
1322 :     sub hsp_context {
1323 :     my( $fig_or_sprout, $cgi, $genome, $e_val, $pct_id,
1324 :     $qid, $q1, $q2, $qlen,
1325 : parrello 1.75 $contig, $s1, $s2, $slen ) = @_;
1326 : golsen 1.65 my $half_sz = 5000;
1327 :    
1328 :     my( $from, $to, $features, $fid, $beg, $end );
1329 :     my( $link, $lbl, $isprot, $function, $uniprot, $info, $prot_query );
1330 :    
1331 :     my $user = $cgi->param( 'user' ) || "";
1332 :     my $sprout = $cgi->param( 'SPROUT' ) ? '&SPROUT=1' : '';
1333 :    
1334 :     my @genes = ();
1335 :    
1336 :     # Based on the match position of the query, select the context region:
1337 :    
1338 :     ( $from, $to ) = ( $s1 <= $s2 ) ? ( $s1 - $half_sz, $s2 + $half_sz )
1339 :     : ( $s2 - $half_sz, $s1 + $half_sz );
1340 :     $from = 1 if ( $from < 1 );
1341 :     $to = $slen if ( $to > $slen );
1342 :    
1343 :     # Get the genes in the region, and adjust the ends to include whole genes:
1344 :    
1345 :     ( $features, $from, $to ) = genes_in_region( $fig_or_sprout, $cgi, $genome, $contig, $from, $to );
1346 :    
1347 : golsen 1.103 # Fix the end points if features have moved them to exclude query:
1348 :    
1349 :     if ( $s1 < $s2 ) { $from = $s1 if $s1 < $from; $to = $s2 if $s2 > $to }
1350 :     else { $from = $s2 if $s2 < $from; $to = $s1 if $s1 > $to }
1351 : golsen 1.65
1352 :     # Add the other features:
1353 :    
1354 :     foreach $fid ( @$features )
1355 :     {
1356 : parrello 1.75 my $contig1;
1357 :     ( $contig1, $beg, $end ) = boundaries_of( $fig_or_sprout, feature_locationS( $fig_or_sprout, $fid ) );
1358 :     next if $contig1 ne $contig;
1359 :    
1360 :     $link = "";
1361 :     if ( ( $lbl ) = $fid =~ /peg\.(\d+)$/ ) {
1362 : overbeek 1.82 ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/protein.cgi/;
1363 : parrello 1.75 $link .= "?prot=$fid&user=$user$sprout";
1364 :     $isprot = 1;
1365 :     } elsif ( ( $lbl ) = $fid =~ /\.([a-z]+)\.\d+$/ ) {
1366 :     $lbl = uc $lbl;
1367 :     $isprot = 0;
1368 :     } else {
1369 :     $lbl = "";
1370 :     $isprot = 0;
1371 :     }
1372 :    
1373 :     $function = function_ofS( $fig_or_sprout, $fid );
1374 :    
1375 :     $uniprot = join ", ", grep { /^uni\|/ } feature_aliasesL( $fig_or_sprout, $fid);
1376 :    
1377 :     $info = join( '<br />', "<b>Feature:</b> $fid",
1378 :     "<b>Contig:</b> $contig",
1379 :     "<b>Begin:</b> $beg",
1380 :     "<b>End:</b> $end",
1381 :     $function ? "<b>Function:</b> $function" : '',
1382 :     $uniprot ? "<b>Uniprot ID:</b> $uniprot" : ''
1383 :     );
1384 :    
1385 : golsen 1.97 # $gene = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
1386 :    
1387 : parrello 1.75 push @genes, [ feature_graphic( $beg, $end, $isprot ),
1388 :     $lbl, $link, $info,
1389 :     $isprot ? () : ( undef, "Feature information" )
1390 :     ];
1391 : golsen 1.65 }
1392 :    
1393 :     # Draw the query. The subject coordinates are always DNA. If the query
1394 :     # is protein, it is about 3 times shorter than the matching contig DNA.
1395 :     # Splitting the difference, if 1.7 times the query length is still less
1396 :     # than the subject length, we will call it a protein query (and reading
1397 :     # frame in the contig coordinates has meaning). If it is nucleotides,
1398 :     # there is no defined frame.
1399 :    
1400 :     $info = join( '<br />', $qid ne 'query ' ? "<b>Query:</b> $qid" : (),
1401 :     "<b>Length:</b> $qlen",
1402 :     "<b>E-value:</b> $e_val",
1403 :     "<b>% identity:</b> " . sprintf( "%.1f", $pct_id ),
1404 :     "<b>Region of similarity:</b> $q1 &#150; $q2"
1405 :     );
1406 :     $prot_query = ( 1.7 * abs( $q2 - $q1 ) < abs( $s2 - $s1 ) ) ? 1 : 0;
1407 :    
1408 : golsen 1.104 if ( $user && $prot_query )
1409 : golsen 1.97 {
1410 :     $link = $cgi->url(-relative => 1);
1411 :     $link =~ s/index\.cgi/propose_new_peg.cgi/;
1412 :     $link .= "?user=$user&genome=$genome&covering=${contig}_${s1}_${s2}";
1413 :     }
1414 :     else
1415 :     {
1416 :     $link = undef;
1417 :     }
1418 :    
1419 : golsen 1.65 push @genes, [ feature_graphic( $s1, $s2, $prot_query ),
1420 : golsen 1.97 'Q', $link, $info, undef, 'Query and match information'
1421 : golsen 1.65 ];
1422 :    
1423 :     return \@genes, $from, $to;
1424 :     }
1425 :    
1426 :    
1427 :     sub feature_graphic {
1428 :     my ( $beg, $end, $isprot ) = @_;
1429 :     my ( $min, $max, $symb, $color );
1430 :    
1431 :     ( $min, $max, $symb ) = ( $beg <= $end ) ? ( $beg, $end, "rightArrow" )
1432 :     : ( $end, $beg, "leftArrow" );
1433 :    
1434 :     # Color proteins by translation frame
1435 :    
1436 :     $color = $isprot ? qw( blue red green )[ $beg % 3 ] : 'grey';
1437 :    
1438 :     ( $min, $max, $symb, $color );
1439 :     }
1440 :    
1441 :    
1442 :     sub genes_in_region {
1443 :     my( $fig_or_sprout, $cgi, $genome, $contig, $min, $max ) = @_;
1444 :    
1445 :     if ( $cgi->param( 'SPROUT' ) )
1446 :     {
1447 : parrello 1.75 my( $x, $feature_id );
1448 :     my( $feat, $min, $max ) = $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1449 :     my @tmp = sort { ($a->[1] cmp $b->[1]) or
1450 :     (($a->[2]+$a->[3]) <=> ($b->[2]+$b->[3]))
1451 :     }
1452 :     map { $feature_id = $_;
1453 :     $x = feature_locationS( $fig_or_sprout, $feature_id );
1454 :     $x ? [ $feature_id, boundaries_of( $fig_or_sprout, $x )] : ()
1455 :     }
1456 :     @$feat;
1457 :     return ( [map { $_->[0] } @tmp ], $min, $max );
1458 : golsen 1.65 }
1459 :     else
1460 :     {
1461 : parrello 1.75 return $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
1462 : golsen 1.65 }
1463 :     }
1464 :    
1465 :    
1466 :     sub feature_locationS {
1467 :     my ( $fig_or_sprout, $peg ) = @_;
1468 :     scalar $fig_or_sprout->feature_location( $peg );
1469 :     }
1470 :    
1471 :    
1472 :     sub boundaries_of {
1473 :     my( $fig_or_sprout, $loc ) = @_;
1474 :     $fig_or_sprout->boundaries_of( $loc );
1475 :     }
1476 :    
1477 :    
1478 :     sub function_ofS {
1479 :     my( $fig_or_sprout, $peg, $user ) = @_;
1480 :     scalar $fig_or_sprout->function_of( $peg, $user );
1481 :     }
1482 :    
1483 :    
1484 :     sub feature_aliasesL {
1485 :     my( $fig_or_sprout, $fid ) = @_;
1486 :     my @tmp = $fig_or_sprout->feature_aliases( $fid );
1487 :     @tmp
1488 :     }
1489 :    
1490 :    
1491 : overbeek 1.30 sub format_sims {
1492 : golsen 1.103 my( $fig, $cgi, $html, $sims, $query, $seq ) = @_;
1493 :     my( $col_hdrs, $table, @ids, $ids, $sim, %seen, $n, $fid );
1494 : overbeek 1.30
1495 :     $col_hdrs = [ "Select up to here",
1496 : parrello 1.75 "Similar sequence",
1497 :     "E-val",
1498 :     "Function",
1499 :     "Organism",
1500 :     "Aliases"
1501 :     ];
1502 : overbeek 1.30
1503 :     $table = [];
1504 :     @ids = ();
1505 : golsen 1.103 $n = 0; # Count reported sequences
1506 :     foreach $sim ( @$sims )
1507 : overbeek 1.30 {
1508 : golsen 1.103 $fid = $sim->[1];
1509 :     next if $seen{ $fid }++; # One hit per sequence
1510 :     next if $fig->is_deleted_fid( $fid ); # Hide deleted sequences
1511 :     my $alii = scalar $fig->feature_aliases( $fid );
1512 :     $alii =~ s/,/, /g;
1513 :     push( @$table, [ $cgi->checkbox( -name => 'list_to',
1514 :     -value => $fid,
1515 :     -override => 1,
1516 :     -checked => 0,
1517 :     -label => ""
1518 :     ),
1519 :     &HTML::fid_link( $cgi, $fid ),
1520 :     [ $sim->[10], "TD NoWrap" ],
1521 :     scalar $fig->function_of( $fid ),
1522 :     $fig->genus_species( $fig->genome_of( $fid ) ),
1523 :     $alii
1524 :     ]
1525 :     );
1526 :     push( @ids, $fid );
1527 :     last if ++$n >= 1000; # Stop after 1000
1528 : overbeek 1.30 }
1529 : golsen 1.103
1530 : overbeek 1.30 $ids = join(",",@ids);
1531 :     my $target = "window$$";
1532 : golsen 1.103 push( @$html, $cgi->start_form( -method => 'post',
1533 : parrello 1.75 -target => $target,
1534 : olson 1.89 -action => $this_script
1535 : parrello 1.75 ),
1536 : golsen 1.103 $cgi->hidden(-name => 'ids', -value => $ids),
1537 :     $cgi->hidden(-name => 'qid', -value => $query),
1538 :     $cgi->hidden(-name => 'qseq', -value => $seq),
1539 :     $cgi->submit('Extract Matched Sequences'),
1540 :     # $cgi->submit('Align Matched Sequences'),
1541 :     &HTML::make_table($col_hdrs,$table,"Best Hits"),
1542 :     $cgi->submit('Extract Matched Sequences'),
1543 :     # $cgi->submit('Align Matched Sequences'),
1544 :     $cgi->end_form
1545 :     );
1546 : overbeek 1.30 }
1547 : overbeek 1.17
1548 : golsen 1.103
1549 : efrank 1.1 sub verify_db {
1550 :     my($db,$type) = @_;
1551 :    
1552 : overbeek 1.17 if ($type =~ /^p/i)
1553 : efrank 1.1 {
1554 : parrello 1.75 if ((! -s "$db.psq") || (-M "$db.psq" > -M $db))
1555 :     {
1556 :     system "$FIG_Config::ext_bin/formatdb -p T -i $db";
1557 :     }
1558 : efrank 1.1 }
1559 :     else
1560 :     {
1561 : parrello 1.75 if ((! -s "$db.nsq") || (-M "$db.nsq" > -M $db))
1562 :     {
1563 :     system "$FIG_Config::ext_bin/formatdb -p F -i $db";
1564 :     }
1565 : efrank 1.1 }
1566 : parrello 1.75 }
1567 : overbeek 1.7
1568 :     sub export_assignments {
1569 :     my($fig,$cgi,$html,$who) = @_;
1570 :     my($genome,$x);
1571 :    
1572 :     my @genomes = map { $_ =~ /\((\d+\.\d+)\)/; $1 } $cgi->param('korgs');
1573 :    
1574 :     if (@genomes == 0)
1575 :     {
1576 : parrello 1.75 @genomes = $fig->genomes;
1577 : overbeek 1.7 }
1578 :    
1579 : overbeek 1.10 my @assignments = $fig->assignments_made(\@genomes,$who,$cgi->param('after_date'));
1580 : overbeek 1.7 if (@assignments == 0)
1581 :     {
1582 : parrello 1.75 push(@$html,$cgi->h1("Sorry, no assignments where made by $who"));
1583 : overbeek 1.7 }
1584 :     else
1585 :     {
1586 : parrello 1.75 my $col_hdrs = ["FIG id", "External ID", "Genus/Species","Assignment"];
1587 :     my $tab = [];
1588 :     my($x,$peg,$func);
1589 :     foreach $x (@assignments)
1590 :     {
1591 :     ( $peg, $func ) = @$x;
1592 :     push( @$tab,[ HTML::set_prot_links( $cgi, $peg ),
1593 :     HTML::set_prot_links( $cgi, ext_id( $fig, $peg ) ),
1594 :     $fig->genus_species($fig->genome_of($peg)),
1595 :     $func
1596 :     ] );
1597 :     }
1598 :    
1599 :     if ($cgi->param('save_assignments'))
1600 :     {
1601 :     my $user = $cgi->param('save_user');
1602 :     if ($user)
1603 :     {
1604 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
1605 :     my $file = &FIG::epoch_to_readable(time) . ":$who:exported_from_local_SEED";
1606 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
1607 :     {
1608 :     print TMP join("",map { join("\t",@$_) . "\n" } map { [$_->[0],$_->[3]] } @$tab);
1609 :     close(TMP);
1610 :     }
1611 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
1612 :     }
1613 :     else
1614 :     {
1615 :     push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
1616 :     }
1617 :     }
1618 :    
1619 :     if ($cgi->param('tabs'))
1620 :     {
1621 :     print $cgi->header;
1622 :     print "<pre>\n";
1623 :     print join("",map { join("\t",@$_) . "\n" } @$tab);
1624 :     print "</pre>\n";
1625 :     exit;
1626 :     }
1627 :     else
1628 :     {
1629 :     push(@$html,&HTML::make_table($col_hdrs,$tab,"Assignments Made by $who"));
1630 :     }
1631 : overbeek 1.7 }
1632 :     }
1633 :    
1634 :     sub ext_id {
1635 :     my($fig,$peg) = @_;
1636 :    
1637 :     my @mapped = grep { $_ !~ /^fig/ } map { $_->[0] } $fig->mapped_prot_ids($peg);
1638 :     if (@mapped == 0)
1639 :     {
1640 : parrello 1.75 return $peg;
1641 : overbeek 1.7 }
1642 :    
1643 :     my @tmp = ();
1644 :     if ((@tmp = grep { $_ =~ /^sp/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1645 :     if ((@tmp = grep { $_ =~ /^pir/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1646 :     if ((@tmp = grep { $_ =~ /^gi/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1647 :     if ((@tmp = grep { $_ =~ /^tr/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1648 :     if ((@tmp = grep { $_ =~ /^tn/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1649 :     if ((@tmp = grep { $_ =~ /^kegg/ } @mapped) && (@tmp > 0)) { return $tmp[0] }
1650 :    
1651 :     return $peg;
1652 :     }
1653 :    
1654 : overbeek 1.28 sub translate_assignments {
1655 :     my($fig,$cgi,$html,$from_func,$to_func) = @_;
1656 :    
1657 : overbeek 1.56 my @funcs = grep { $_ =~ /^\S.*\S$/ } split(/[\012\015]+/,$from_func);
1658 :    
1659 : overbeek 1.28 my $user = $cgi->param('save_user');
1660 :     if ($user)
1661 :     {
1662 :     &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
1663 :     my $file = &FIG::epoch_to_readable(time) . ":$user:translation";
1664 :     if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
1665 :     {
1666 : overbeek 1.55 my($peg,$func);
1667 : overbeek 1.28
1668 : overbeek 1.56 foreach $from_func (@funcs)
1669 : overbeek 1.28 {
1670 : overbeek 1.57 my $from_funcQ = quotemeta $from_func;
1671 :    
1672 : overbeek 1.56 foreach $peg ($fig->seqs_with_role($from_func))
1673 : overbeek 1.28 {
1674 : overbeek 1.56 if ($peg =~ /^fig\|/)
1675 : overbeek 1.28 {
1676 : overbeek 1.56 $func = $fig->function_of($peg);
1677 :     if ($func eq $from_func)
1678 :     {
1679 :     print TMP "$peg\t$to_func\n";
1680 :     }
1681 : overbeek 1.73 else
1682 : overbeek 1.57 {
1683 : overbeek 1.73 my @pieces = grep { $_ } split(/(\s+[\/@]\s+)|(\s*;\s+)/,$func);
1684 :     if (@pieces > 1)
1685 :     {
1686 :     my $func1 = join("",map { $_ =~ s/^$from_funcQ$/$to_func/; $_ } @pieces);
1687 :     if ($func ne $func1)
1688 :     {
1689 :     print TMP "$peg\t$func1\n";
1690 :     }
1691 :     }
1692 : overbeek 1.57 }
1693 : overbeek 1.28 }
1694 :     }
1695 :     }
1696 :     close(TMP);
1697 :     }
1698 :     push(@$html,$cgi->h1("Saved Assignment Set $file"));
1699 :     }
1700 :     else
1701 :     {
1702 : parrello 1.75 push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
1703 : overbeek 1.28 }
1704 :     }
1705 : efrank 1.92
1706 :     sub find_pegs_by_cv1 {
1707 :     my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;
1708 :    
1709 :     # Remember kind of search that got us hear so we can call back
1710 :     # with same kind
1711 :     my $search = "Search";
1712 :     if ($cgi->param('Search genome selected below')) {
1713 :     $search=uri_escape('Search genome selected below');
1714 :     } elsif ( $cgi->param('Search Selected Organisms') ) {
1715 :     $search = uri_escape('Search Selected Organisms');
1716 :     } elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
1717 :     $search = uri_escape('Find Genes in Org that Might Play the Role');
1718 :     }
1719 :    
1720 :     my $search_results = $fig->search_cv_file($cv, $pattern);
1721 :    
1722 :     my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
1723 :     my $find_table_rows;
1724 :     my $counter = 0;
1725 :     for my $r (@$search_results)
1726 :     {
1727 :     my @temp = split("\t",$r);
1728 :     my $row = [];
1729 :     my $id= $temp[1];
1730 :     my $term = $temp[2];
1731 :     my $id_and_term = $id."; ".$term;
1732 :     my $pattern=uri_escape("$id; $term");
1733 :    
1734 :     my $link = "index.cgi?pattern=$pattern&Search=1&user=$user";
1735 :     my $cb = "<a href=$link>Find PEGs</a>";
1736 :    
1737 :     #feh my $cb = $cgi->submit(-name=>'$search', -value=>'Find PEGs');
1738 :     #my $cb_value = $cv."split_here".$id."; ".$term;
1739 :     #my $cb ="<input type=checkbox name=find_checked_$counter value='$cb_value'>" ;
1740 :     push(@$row,$cb);
1741 :     push(@$row,$cv);
1742 :     push(@$row,$id_and_term);
1743 :     push(@$find_table_rows,$row);
1744 :     $counter = $counter + 1;
1745 :     }
1746 :    
1747 :     my $find_terms_button="";
1748 :     if ($counter > 0) {
1749 :     $find_terms_button= $cgi->submit(-name=>'$search', -value=>'$search');
1750 :     }
1751 :    
1752 :     # build the page
1753 :     push @$html,
1754 :     $cgi->start_form(),
1755 :     $cgi->hidden(-name=>'user', -value=>'$user'),
1756 :     $cgi->br,
1757 :     "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
1758 :     $cgi->hr,
1759 :     "<h4>Terms Matching Your Criteria </h4>\n",
1760 :     $cgi->br,
1761 :     &HTML::make_table($find_col_hdrs,$find_table_rows),
1762 :     $cgi->br,
1763 :     $find_terms_button,
1764 :     $cgi->end_form;
1765 :    
1766 :     return $html;
1767 :     }
1768 :    
1769 :     sub find_pegs_by_cv {
1770 :     my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;
1771 :    
1772 :     # Remember kind of search that got us hear so we can call back
1773 :     # with same kind (not working so force to simple Search)
1774 :    
1775 :     my $search = "Search";
1776 :    
1777 :     #if ($cgi->param('Search genome selected below')) {
1778 :     # $search='Search genome selected below';
1779 :     #} elsif ( $cgi->param('Search Selected Organisms') ) {
1780 :     # $search = 'Search Selected Organisms';
1781 :     #} elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
1782 :     # $search = 'Find Genes in Org that Might Play the Role';
1783 :     #}
1784 :    
1785 :     my $search_results = $fig->search_cv_file($cv, $pattern);
1786 :    
1787 :     my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
1788 :     my @patterns=();
1789 :     for my $r (@$search_results)
1790 :     {
1791 :     my @temp = split("\t",$r);
1792 :     my $id= $temp[1];
1793 :     my $term = $temp[2];
1794 :     my $pattern="$id; $term";
1795 :    
1796 :     push(@patterns,$pattern);
1797 :     }
1798 :    
1799 :     my @pattern_radio;
1800 :     if ($#patterns + 1) {
1801 :     @pattern_radio = $cgi->radio_group( -name => 'pattern',
1802 :     -values => [ @patterns ]
1803 :     );
1804 :     } else {
1805 :     @pattern_radio = ("Nothing found");
1806 :     }
1807 :    
1808 :     my $find_terms_button= $cgi->submit(-name=>"Search", -value=>"Search");
1809 :    
1810 :     # build the page
1811 :     push @$html,
1812 :     $cgi->start_form(),
1813 :     $cgi->hidden(-name=>'user', -value=>'$user'),
1814 :     $cgi->br,
1815 :     "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
1816 :     $cgi->hr,
1817 :     "<h4>$cv Terms Matching Your Criteria </h4>\n",
1818 :     $cgi->br,
1819 :     $find_terms_button,
1820 :     $cgi->br,
1821 :     $cgi->br,
1822 :     join( "<br>", @pattern_radio),
1823 :     # &HTML::make_table($find_col_hdrs,$find_table_rows),
1824 :     $cgi->br,
1825 :     $find_terms_button,
1826 :     $cgi->end_form;
1827 :    
1828 :     return $html;
1829 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3