[Bio] / FigWebServices / families_on_tree.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/families_on_tree.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download)

1 : overbeek 1.1 ########################################################################
2 :     use CGI;
3 :    
4 :    
5 :     if (-f "$FIG_Config::data/Global/why_down")
6 :     {
7 :     local $/;
8 :     open my $fh, "<$FIG_Config::data/Global/why_down";
9 :     my $down_msg = <$fh>;
10 :    
11 :     print CGI::header();
12 :     print CGI::head(CGI::title("SEED Server down"));
13 :     print CGI::start_body();
14 :     print CGI::h1("SEED Server down");
15 :     print CGI::p("The seed server is not currently running:");
16 :     print CGI::pre($down_msg);
17 :     print CGI::end_body();
18 :     exit;
19 :     }
20 :    
21 :     if ($FIG_Config::readonly)
22 :     {
23 :     CGI::param("user", undef);
24 :     }
25 :     ########################################################################
26 :     use CGI;
27 :    
28 :    
29 :     if (-f "$FIG_Config::data/Global/why_down")
30 :     {
31 :     local $/;
32 :     open my $fh, "<$FIG_Config::data/Global/why_down";
33 :     my $down_msg = <$fh>;
34 :    
35 :     print CGI::header();
36 :     print CGI::head(CGI::title("SEED Server down"));
37 :     print CGI::start_body();
38 :     print CGI::h1("SEED Server down");
39 :     print CGI::p("The seed server is not currently running:");
40 :     print CGI::pre($down_msg);
41 :     print CGI::end_body();
42 :     exit;
43 :     }
44 :    
45 :     if ($FIG_Config::readonly)
46 :     {
47 :     CGI::param("user", undef);
48 :     }
49 :     ########################################################################
50 :     # -*- perl -*-
51 :     #
52 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
53 :     # for Interpretations of Genomes. All Rights Reserved.
54 :     #
55 :     # This file is part of the SEED Toolkit.
56 :     #
57 :     # The SEED Toolkit is free software. You can redistribute
58 :     # it and/or modify it under the terms of the SEED Toolkit
59 :     # Public License.
60 :     #
61 :     # You should have received a copy of the SEED Toolkit Public License
62 :     # along with this program; if not write to the University of Chicago
63 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
64 :     # Genomes at veronika@thefig.info or download a copy from
65 :     # http://www.theseed.org/LICENSE.TXT.
66 :     #
67 :    
68 :     use DB_File;
69 :     use URI::Escape; # uri_escape
70 :     use gjoseqlib;
71 :     use HTML;
72 :     use strict;
73 :     use CGI;
74 :     my $cgi = new CGI;
75 :     use SeedEnv;
76 :     use tree_utilities;
77 :    
78 :     if (0) {
79 :     print $cgi->header;
80 :     my @params = $cgi->param;
81 :     print "<pre>\n";
82 :     foreach $_ (@params) {
83 :     print "$_\t:",join(",",$cgi->param($_)),":\n";
84 :     }
85 :     exit;
86 :     }
87 :    
88 :     my $html = [];
89 :    
90 :     my $node = $cgi->param('node');
91 :     my $family = $cgi->param('family');
92 :     my $ali = $cgi->param('alignment');
93 :     my $tree = $cgi->param('tree');
94 :     my $request = $cgi->param('request');
95 :     my $keywords = $cgi->param('keywords');
96 :     my $function = $cgi->param('function');
97 :     my $dataD = $cgi->param('dataD');
98 :     my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS";
99 :     my $dataDF = "$dataDir/$dataD";
100 :    
101 :     if ($request eq "show_otus")
102 :     {
103 :     &show_otus($cgi,$dataDir);
104 :     exit;
105 :     }
106 :     elsif (($request eq "show_options_for_otu") && $dataD)
107 :     {
108 :     &show_options_for_otu($cgi,$dataD);
109 :     exit;
110 :     }
111 :    
112 :     if (($request eq "show_virulence_functions") && (-s "$dataDF/virulence.functions"))
113 :     {
114 :     &show_virulence_functions($cgi,$dataDF,$html);
115 :     }
116 :     elsif (($request eq 'show_indexed_funcs') && $keywords)
117 :     {
118 :     &show_indexed_funcs($cgi,$dataDF,$html,$keywords);
119 :     }
120 :     elsif (($request eq "show_func") && $function)
121 :     {
122 :     $function =~ s/^\s+//;
123 :     $function =~ s/\s+$//;
124 :     &show_func($cgi,$dataDF,$html,$function);
125 :     }
126 :     elsif (($request eq "show_ali_or_occurs_tree") && $ali)
127 :     {
128 :     &show_ali($cgi);
129 :     exit;
130 :     }
131 :     elsif (($request eq "show_ali_or_occurs_tree") && $tree)
132 :     {
133 :     &show_occurs_tree($cgi,$dataDF);
134 :     exit;
135 :     }
136 :     elsif (($request eq "show_family_pegs") && $family)
137 :     {
138 :     &show_family1($cgi,$dataDF,$html,$family);
139 :     }
140 :     elsif (($request eq "show_family_tree") && $family)
141 :     {
142 :     &show_family2($cgi,$dataDF,$html,$family);
143 :     }
144 :     elsif (($request eq "show_node") && $node)
145 :     {
146 :     &show_changes($cgi,$dataDF,$html,$node);
147 :     }
148 :     elsif ($request eq "show_otu_tree")
149 :     {
150 :     &show_otu_tree($cgi,$dataDF,$html,'families');
151 :     }
152 :     elsif ($request eq "show_adjacency")
153 :     {
154 :     &show_otu_tree($cgi,$dataDF,$html,'adjacency');
155 :     }
156 :     elsif ($request eq "show_clusters")
157 :     {
158 :     &show_clusters($cgi,$dataDF,$html);
159 :     }
160 :     elsif ($request eq "show_kovbassa")
161 :     {
162 :     &show_kovbassa_signatures($cgi,$dataDF,$html);
163 :     }
164 :     elsif ($request eq "compute_kovbassa_sigs")
165 :     {
166 :     &compute_kovbassa_signatures($cgi,$dataDF,$html);
167 :     }
168 :     else
169 :     {
170 :     push(@$html,"<h1>Invalid request</h1>");
171 :     }
172 :     &HTML::show_page($cgi,$html);
173 :     exit;
174 :    
175 :     sub show_otu_tree {
176 :     my($cgi,$dataDF,$html,$type) = @_;
177 :    
178 :     $dataDF =~ /([^\/]+)$/;
179 :     my $dataD = $1;
180 :     my @tree;
181 :     my @tmp = `cat $dataDF/readable.tree`;
182 :     foreach $_ (@tmp)
183 :     {
184 :     if ($_ =~ /^(.*\+ )(n\d+)$/)
185 :     {
186 :     my $start = $1;
187 :     my $node = $2;
188 :     my $link = &show_node_link($dataD,$node,$type);
189 :     push(@tree,"$start$link\n");
190 :     }
191 :     elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
192 :     {
193 :     my $start = $1;
194 :     my $node = $2;
195 :     my $end = $3;
196 :     push(@tree,"$start",&show_node_link($dataD,$node,$type),"$end\n");
197 :     }
198 :     else
199 :     {
200 :     push(@tree,$_);
201 :     }
202 :     }
203 :    
204 :     my $tree = join("",@tree);
205 :     push(@$html,"<pre>\n$tree\n</pre>\n");
206 :     }
207 :    
208 :     sub show_family1 {
209 :     my($cgi,$dataDF,$html,$family) = @_;
210 :    
211 :     $dataDF =~ /([^\/]+)$/;
212 :     my $dataD = $1;
213 :    
214 :     my $func;
215 :     foreach $_ (`cut -f1,2 $dataDF/families.all`)
216 :     {
217 :     if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($1 == $family)) { $func = $2 }
218 :     }
219 :     my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`;
220 :     my $col_hdrs = ['','Genome','Genome Name','Peg'];
221 :     my @tuples = map { my $tuple = $_; $tuple->[3] = &peg_link($tuple->[3]); $tuple }
222 :     sort { $a->[1] cmp $b->[1] }
223 :     map { (($_ =~ /^(\d+)\t([^\t]*)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && ($1 == $family)) ?
224 :     [$cgi->checkbox(-name => "check.$3",-checked => 1,-label => ''),
225 :     $4,
226 :     $genome_names{$4},
227 :     $3] : () }
228 :     `cut -f1,2,4 $dataDF/families.all`;
229 : overbeek 1.2 push(@$html,$cgi->start_form(-action => "./families_on_tree.cgi"));
230 : overbeek 1.1 push(@$html,"<br><br>\n",&HTML::make_table($col_hdrs,\@tuples,"Distribution of Family $family: $func"),$cgi->hr,"\n");
231 :     push(@$html,"<input type=hidden name=dataD value=$dataD>\n");
232 :     push(@$html,"<input type=hidden name=request value=show_ali_or_occurs_tree>\n");
233 :     push(@$html,$cgi->submit('alignment'),
234 :     $cgi->checkbox(-name => "dna",-checked => 0,-label => 'DNA'),
235 :     "<br><br>",
236 :     $cgi->submit('tree'),
237 :     $cgi->end_form());
238 :     }
239 :    
240 :     sub show_family2 {
241 :     my($cgi,$dataDF,$html,$family) = @_;
242 :     # if union is specified, you want to treat all families with the same function
243 :     # as "present"
244 :     my $union = $cgi->param('union');
245 :     my $func; # used only if union is requested
246 :     my @fam_func_peg_tuples = map { chomp; [split(/\t/,$_)] } `cut -f1,2,4 $dataDF/families.all`;
247 :     if ($union) {
248 :     my @tmp = grep { $_->[0] == $family } @fam_func_peg_tuples;
249 :     $func = $tmp[0]->[1];
250 :     }
251 :    
252 :     $dataDF =~ /([^\/]+)$/;
253 :     my $dataD = $1;
254 :    
255 :     push(@$html,"<br><hr>",&show_fam_table_link($dataDF,$family),"<br><br>");
256 :     my @tuples = grep { $union ? ($_->[1] eq $func) : ($_->[0] == $family) }
257 :     @fam_func_peg_tuples;
258 :     my $func = (@tuples > 0) ? $tuples[0]->[1] : 'hypothetical protein';
259 :     my %has = map { ($_->[2] =~ /fig\|(\d+\.\d+)/) ? ($1 => 1) : () } @tuples;
260 :     my @tree;
261 :     my %node_vals;
262 :     if (! -s "$dataDF/fams.on.tree.db")
263 :     {
264 :     %node_vals = map { (($_ =~ /^(\S+)\t(n\d+)\t(\S+)/) && ($family eq $1)) ? ($2 => $3) : () } `cat $dataDF/families.on.tree`;
265 :     }
266 :     else
267 :     {
268 :     if ($dataD ne "bacteria") { die "only bacteria have a db"; }
269 :     my $bacteriaD = "/homes/overbeek/Ross/MakeCS/Data/CS/bacteria";
270 :     my $fams_on_tree_db = "$bacteriaD/fams.on.tree.db";
271 :     my %fams_on_tree;
272 :     tie %fams_on_tree,"DB_File", $fams_on_tree_db, O_RDWR, 0666, $DB_HASH
273 :     or die "Cannot open file '$fams_on_tree_db': $!\n";
274 :     my $node_vals = $fams_on_tree{$family};
275 :     %node_vals = map { $_ =~ s/://g; ($_ =~ /^(\S+),(\S+)/) ? ($1 => $2) : () } split("::",$node_vals);
276 :     }
277 :    
278 :     my @tmp = `cat $dataDF/readable.tree`;
279 :     foreach $_ (@tmp)
280 :     {
281 :     if ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
282 :     {
283 :     my $start = $1;
284 :     my $genome = $2;
285 :     my $end = $3;
286 :     if ($has{$genome})
287 :     {
288 :     push(@tree,"$start<b>$genome$end</b>\n");
289 :     }
290 :     else
291 :     {
292 :     push(@tree,"$start$genome$end\n");
293 :     }
294 :     }
295 :     elsif (($_ =~ /^(.*\+ )(n\d+)$/) && (! $union))
296 :     {
297 :     my $start = $1;
298 :     my $node = $2;
299 :     my $status = $node_vals{$node};
300 :     my $to_show = $node . ":$status";
301 :     push(@tree,"$start$to_show\n");
302 :     }
303 :     else
304 :     {
305 :     push(@tree,$_);
306 :     }
307 :     }
308 :     my $tree = join("",@tree);
309 :     push(@$html,"<br><h2>$func</h2><br><pre>\n$tree\n</pre>\n");
310 :     }
311 :    
312 :     sub show_changes {
313 :     my($cgi,$dataDF,$html,$node) = @_;
314 :    
315 :     my $type = $cgi->param('type');
316 :     if ($type eq 'families')
317 :     {
318 :     &show_changes_families($cgi,$dataDF,$html,$node);
319 :     }
320 :     else
321 :     {
322 :     &show_changes_adjacency($cgi,$dataDF,$html,$node);
323 :     }
324 :     }
325 :    
326 :     sub show_changes_adjacency {
327 :     my($cgi,$dataDF,$html,$node) = @_;
328 :     $dataDF =~ /([^\/]+)$/;
329 :     my $dataD = $1;
330 :     my $col_hdrs = ['Family','Function','Ancestral','New','Compare'];
331 :     my @events = grep { $node eq $_->[1] }
332 :     map { ($_ =~ /(\S+)\t(\S+)\t(\d+):\S+\t(\d+):\S+\t(\d+)/) ? [$1,$2,$3,$4,$5] : () }
333 :     `cat $dataDF/placed.events`;
334 :     my %families = map { ($_->[2] => [$_->[3],$_->[4]])} @events;
335 :     my %pegs_needed = map { (($_->[2] => 1), ($_->[3] => 1),($_->[4] => 1)) } @events;
336 :     my %fam_peg = map { my $x;
337 :     (($_ =~ /^(\d+)\t\S+\t(\d+)\t\S+\t\S+\t(\S+)\t(\S+)/) &&
338 :     ($x = $families{$1}) && (($x->[0] eq $2) || ($x->[1] eq $2))) ? ("$1,$2" => $3) : () }
339 :     `cat $dataDF/adjacency.of.unique`;
340 :     my %peg_to_func = map { (($_ =~ /^([^t]+)\t([^\t]*)\t(\S+)/) && $pegs_needed{$1}) ? ($3 => $2) : () } `cut -f1,2,4 $dataDF/families.all`;
341 :     my @rows;
342 :     my $ancestor;
343 :     foreach my $event (@events)
344 :     {
345 :     my($anc,$node,$fam,$fam1,$fam2) = @$event;
346 :     $ancestor = $anc;
347 :     my $peg1 = $fam_peg{"$fam,$fam1"};
348 :     my $peg2 = $fam_peg{"$fam,$fam2"};
349 :     my $func = $peg_to_func{$peg1};
350 :     if ($peg1 && $peg2 && $func)
351 :     {
352 :     push(@rows,[&show_fam_table_link($dataDF,$fam),
353 :     $func,
354 :     &peg_link($peg1),
355 :     &peg_link($peg2),
356 :     &compare_link([$peg1,$peg2])]);
357 :     }
358 :     }
359 :     push(@$html,&HTML::make_table($col_hdrs,\@rows,"Changes in Adjacency from $ancestor"));
360 :     }
361 :    
362 :    
363 :     sub show_changes_families {
364 :     my($cgi,$dataDF,$html,$node) = @_;
365 :    
366 :     $dataDF =~ /([^\/]+)$/;
367 :     my $dataD = $1;
368 :    
369 :     my %func = map { ($_ =~ /^(\d+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } `cut -f1,2 $dataDF/families.all`;
370 :     my $col_hdrs = ['Show Where','Show PEGs','Family','Function','Clusters','Coupling'];
371 :     my @tmp = grep { (($_ =~ /^\S+\t(\S+)/) && ($1 eq $node)) }
372 :     `cat /$dataDF/where.shifts.occurred`;
373 :     my @tabG = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) }
374 :     map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t0\t1/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () }
375 :     @tmp;
376 :     # tabG entries are [linkT,linkP,ancestor,fam,func]
377 :    
378 :     # try to pick up the ancestor node from the first entry in @tabG
379 :     # If you cannot get it, try to take it from @tabL
380 :     my $anc = (@tabG > 0) ? $tabG[0]->[-3] : undef;
381 :     foreach $_ (@tabG) { splice(@$_,2,1) } ### get rid of ancestor
382 :     ## tabG entries are [linkT,linkP,fam,func]
383 :    
384 :     my @tabL = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) }
385 :     map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t1\t0/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () }
386 :     @tmp;
387 :     if (! $anc)
388 :     {
389 :     $anc = (@tabL > 0) ? $tabL[0]->[-3] : '';
390 :     }
391 :     foreach $_ (@tabL) { splice(@$_,2,1) } ### get rid of ancestor
392 :    
393 :     ## @tabG and @tabL are of the form [link-to-tree,link-to-peg-display,family,function]]
394 :     ## we now add coupling data.
395 :    
396 :     my $with_couplingL = &build_table(\@tabL,$dataDF);
397 :     my $with_couplingG = &build_table(\@tabG,$dataDF);
398 :    
399 :     push(@$html,&HTML::make_table($col_hdrs,$with_couplingG,"Families Gained from Ancestor $anc"),$cgi->hr,"\n");
400 :     push(@$html,&HTML::make_table($col_hdrs,$with_couplingL,"Families Lost from Ancestor $anc"),$cgi->hr,"\n");
401 :     }
402 :    
403 :     sub coupling_data {
404 :     my($dataDF,$famsH) = @_;
405 :    
406 :     my $coupled = {};
407 :     foreach $_ (`cat $dataDF/coupled.families`)
408 :     {
409 :     if (($_ =~ /(\S+)\t(\S+)\t(\d+)/) && $famsH->{$1} && $famsH->{$2} && ($1 ne $2))
410 :     {
411 :     push(@{$coupled->{$1}},$2);
412 :     }
413 :     }
414 :     return $coupled;
415 :     }
416 :    
417 :     sub cluster_link_and_cluster_html {
418 :     my($family,$coupledH,$fam_to_func,$dataD) = @_;
419 :    
420 :     my $cluster_link = '';
421 :     my $coupled = $coupledH->{$family};
422 :     my $coupled_html = "";
423 :     if ($coupled && (@$coupled > 0))
424 :     {
425 :     $cluster_link = &show_clusters_link($dataD,$family,$coupled);
426 :     $coupled_html = join("<br>",map { &show_fam_table_link($dataD,$_) . '(' .
427 :     &show_func_link($dataD,$fam_to_func->{$_}) . ')'
428 :     } @$coupled);
429 :     }
430 :     return ($cluster_link,$coupled_html);
431 :     }
432 :    
433 :     sub build_table {
434 :     my($tab,$dataDF) = @_;
435 :     $dataDF =~ /([^\/]+)$/;
436 :     my $dataD = $1;
437 :    
438 :     my %famH = map { ($_->[-2] => 1) } @$tab;
439 :     my %fam_to_func = map { ($_->[2] => $_->[3]) } @$tab;
440 :     my $coupledH = &coupling_data($dataDF,\%famH);
441 :     my @with_coupling;
442 :     foreach my $tuple (@$tab)
443 :     {
444 :     my($link1,$link2,$family,$function) = @$tuple;
445 :     $tuple->[3] = &show_func_link($dataD,$function);
446 :     my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($family,$coupledH,\%fam_to_func,$dataD);
447 :     $tuple->[4] = $cluster_link;
448 :     $tuple->[5] = $coupled_html;
449 :     push(@with_coupling,$tuple);
450 :     }
451 :     return \@with_coupling;
452 :     }
453 :    
454 :     sub show_fam_links {
455 :     my($dataDF,$fam) = @_;
456 :    
457 :     $dataDF =~ /([^\/]+)$/;
458 :     my $dataD = $1;
459 :    
460 : overbeek 1.2 return ("<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_tree&family=$fam&dataD=$dataD>tree</a>",
461 :     "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_pegs&family=$fam&pegs=1&dataD=$dataD>PEGs:$fam</a>");
462 : overbeek 1.1 }
463 :    
464 :     sub show_fam_table_link {
465 :     my($dataDF,$fam) = @_;
466 :    
467 :     $dataDF =~ /([^\/]+)$/;
468 :     my $dataD = $1;
469 :    
470 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&pegs=1&request=show_family_pegs&dataD=$dataD>$fam: PEGs</a>";
471 : overbeek 1.1 }
472 :    
473 :     sub show_func {
474 :     my($cgi,$dataDF,$html,$function) = @_;
475 :     $dataDF =~ /([^\/]+)$/;
476 :     my $dataD = $1;
477 :    
478 :     my %counts;
479 :     my %fams = map { my $x;
480 :     if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($2 eq $function))
481 :     {
482 :     $counts{$1}++;
483 :     ($1 => 1);
484 :     }
485 :     else
486 :     {
487 :     ()
488 :     }
489 :     } `cat $dataDF/families.all`;
490 :     my $col_hdrs = ['Family','PEGs in Family','Distribution on Tree','Size'];
491 :     my @tab = map { [$_,
492 :     &show_fam_table_link($dataDF,$_),
493 :     &show_fam_on_tree_link($dataDF,$_),
494 :     $counts{$_}
495 :     ] } sort { $a <=> $b } keys(%fams);
496 :     push(@$html,"<h1>$function</h1>", &HTML::make_table($col_hdrs,\@tab,"Families with Function"),"<hr>");
497 :    
498 :     my @fams = keys(%fams);
499 :     if (@fams > 1)
500 :     {
501 :     push(@$html,"<br>",&show_fam_on_tree_link($dataDF,$fams[0],"union")," with union of families<br><br>");
502 :     }
503 :     push(@$html,"<hr>");
504 :    
505 :     my @gained = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t0\t1/) && $fams{$3}) ?
506 :     [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () }
507 :     `cat $dataDF/where.shifts.occurred`;
508 :     push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@gained,"Where $function was GAINED"));
509 :     push(@$html,"<hr><br>");
510 :     my @lost = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t1\t0/) && $fams{$3}) ?
511 :     [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () }
512 :     `cat $dataDF/where.shifts.occurred`;
513 :     push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@lost,"Where $function was LOST"));
514 :     }
515 :    
516 :     sub show_indexed_funcs {
517 :     my($cgi,$dataDF,$html,$keywords) = @_;
518 :    
519 :     $dataDF =~ /([^\/]+)$/;
520 :     my $dataD = $1;
521 :    
522 :     my $functions_in_fams = &functions_in_at_least_one_family($dataDF);
523 :     # $keywords = "$dataD " . $keywords; ### tell the user to add it,if necessary
524 :    
525 :     my %funcs_to_show;
526 :     foreach my $func (`svr_sphinx_indexing -k \'$keywords\' | cut -f1 | svr_function_of | cut -f2`)
527 :     {
528 :     chomp $func;
529 :     if ($functions_in_fams->{$func})
530 :     {
531 :     $funcs_to_show{$func}++;
532 :     }
533 :     }
534 :     my @funcs = sort { $funcs_to_show{$b} <=> $funcs_to_show{$a} } keys(%funcs_to_show);
535 :     if (@funcs == 0)
536 :     {
537 :     push(@$html,"<h1>Sorry, no functions matched</h1>\n");
538 :     }
539 :     else
540 :     {
541 :     my @links = map { [&show_func_link($dataD,$_)] } @funcs;
542 :     push(@$html,&HTML::make_table(['Possible Functions'],\@links,"Possible functions - Select to find nodes where shifts occurred"));
543 :     }
544 :     }
545 :    
546 :     sub show_virulence_functions {
547 :     my($cgi,$dataDF,$html) = @_;
548 :    
549 :     $dataDF =~ /([^\/]+)$/;
550 :     my $dataD = $1;
551 :    
552 :     my $functions_in_fams = &functions_in_at_least_one_family($dataDF);
553 :     my @virulence_functions = map { chomp; $functions_in_fams->{$_} ? $_ : () } `cat $dataDF/virulence.functions`;
554 :     my @links = map { [&show_func_link($dataD,$_)] } sort @virulence_functions;
555 :     push(@$html,&HTML::make_table(['Function Sometimes Associated with Virulence'],
556 :     \@links,
557 :     'Functions Known to Be Associated with Virulence in Some Organisms'));
558 :     }
559 :     sub functions_in_at_least_one_family {
560 :     my($dataDF) = @_;
561 :    
562 :     my %functions_in_fams = map { chomp; ($_ => 1) } `cut -f2 $dataDF/families.all`;
563 :     return \%functions_in_fams;
564 :     }
565 :    
566 :     sub subsystems_of {
567 :     my($fig,$reaction_to_roles,$reaction) = @_;
568 :    
569 :     my $roles_and_pegs = $reaction_to_roles->{$reaction};
570 :     my @roles = map { my $x = $_; $x =~ s/^[^:]+://; $x } @{$reaction_to_roles->{$reaction}};
571 :     my $printable_roles = "";
572 :     if (@roles > 0)
573 :     {
574 :     my %tmp = map { $_ =~ /^([^:]+):(\S.*\S)/; ($2 => $1) } @$roles_and_pegs;
575 :     my @tmp = map { &peg_link($tmp{$_}) . "<br>" . $_ } sort keys(%tmp);
576 :     $printable_roles = join(",",@tmp);
577 :     }
578 :     my %subsys;
579 :     foreach my $role (@roles)
580 :     {
581 :     foreach my $s ($fig->role_to_subsystems($role))
582 :     {
583 :     $subsys{$s} = 1;
584 :     }
585 :     }
586 :     return join("\t",sort keys(%subsys)) . "<br>" . $printable_roles;
587 :     }
588 :    
589 :     sub show_ali {
590 :     my($cgi) = @_;
591 :     my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param();
592 :     if (@checked > 1)
593 :     {
594 :     my %checked = map { $_ => 1 } @checked;
595 :     my $dataD = $cgi->param('dataD');
596 :     my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS";
597 :     my $dataDF = "$dataDir/$dataD";
598 :    
599 :     my $tmp_seqs_in = "$FIG_Config::temp/tmp$$.seqs.in";
600 :     my $tmp_seqs_ali = "$FIG_Config::temp/tmp$$.ali";
601 :     open(SEQS,">$tmp_seqs_in") || die "could not open $tmp_seqs_in";
602 :     my $dna = $cgi->param('dna');
603 :     opendir(GENOMES,"$dataDF/Seqs") || die "could not open $dataDF/Seqs";
604 :     my @genomes = grep { $_ !~ /^\./ } readdir(GENOMES);
605 :     closedir(GENOMES);
606 :     foreach my $g (@genomes)
607 :     {
608 :     my @seqs;
609 :     if($dna)
610 :     {
611 :     @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/PegDNA/$g");
612 :     }
613 :     else
614 :     {
615 :     @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/Seqs/$g");
616 :     }
617 :     foreach my $tuple(@seqs)
618 :     {
619 :     my($peg,undef,$seq) = @$tuple;
620 :     print SEQS ">$peg\n$seq\n";
621 :     }
622 :     }
623 :     close(SEQS);
624 :    
625 :     &SeedUtils::run("svr_align_seqs < $tmp_seqs_in | svr_ali_to_html > $tmp_seqs_ali");
626 :     open(SEQS,"<$tmp_seqs_ali") || die "could not open $tmp_seqs_ali";
627 :     print $cgi->header;
628 :     while (defined($_ = <SEQS>))
629 :     {
630 :     $_ =~ s/\b(fig\|\d+\.\d+\.peg\.\d+)/<a target=_blank href=http:\/\/pubseed.theseed.org\/seedviewer.cgi?page=Annotation&feature=$1>$1<\/a>/g;
631 :     print $_;
632 :     }
633 :     close(SEQS);
634 :     unlink($tmp_seqs_in,$tmp_seqs_ali);
635 :     }
636 :     }
637 :    
638 :     sub show_occurs_tree {
639 :     my($cgi,$dataDF) = @_;
640 :    
641 :     my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param();
642 :     if (@checked > 3)
643 :     {
644 :     my %genome_name =
645 :     map { ($_ =~ /^(\d+\.\d+)\t(\S.*\S)/) ? ($1 => $2) : () }
646 :     `cat $dataDF/genome.names`;
647 :     my $tmp_labels = "$FIG_Config::temp/tmp$$.labels";
648 :     open(LABELS,">$tmp_labels") || die "could not open $tmp_labels";
649 :     foreach my $peg (@checked)
650 :     {
651 :     if ($peg =~ /^fig\|(\d+\.\d+)/)
652 :     {
653 :     my $g = $genome_name{$1};
654 :     my $link = &peg_link($peg);
655 :     print LABELS "$peg\t$link: $g\n";
656 :     }
657 :     }
658 :     close(LABELS);
659 :     my $tmp_seqs = "$FIG_Config::temp/tmp$$.seqs";
660 :     open(SEQS,"| svr_fasta -fasta -protein | svr_align_seqs | svr_tree | sketch_tree -m -l $tmp_labels > $tmp_seqs")
661 :     || die "could not open $tmp_seqs";
662 :     foreach $_ (@checked)
663 :     {
664 :     print SEQS "$_\n";
665 :     }
666 :     close(SEQS);
667 :     open(SEQS,"<$tmp_seqs") || die "could not open $tmp_seqs";
668 :     print $cgi->header;
669 :     print "<pre>\n";
670 :     while (defined($_ = <SEQS>))
671 :     {
672 :     print $_;
673 :     }
674 :     print "</pre>\n";
675 :     close(SEQS);
676 :     unlink($tmp_seqs,$tmp_labels);
677 :     }
678 :     }
679 :     sub show_options_for_otu {
680 :     my($cgi,$g) = @_;
681 :    
682 :     print $cgi->header;
683 :     print "<h3>$g</h3>\n";
684 :     print "<ol>\n";
685 : overbeek 1.2 print "<li> <a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_otu_tree&dataD=$g>Gains/losses of Gene Families</a>\n";
686 :     print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_kovbassa&dataD=$g>Genes that Distinguishing Families (Genes as Signatures)</a>\n";
687 :     print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_adjacency&dataD=$g>Changes in Adjacency</a>\n";
688 : overbeek 1.1 print "</ol>\n";
689 :    
690 :     push(@$html,"<br>",
691 : overbeek 1.2 $cgi->start_form(-action => "./families_on_tree.cgi"),
692 : overbeek 1.1 $cgi->textfield(-name => 'keywords',-size => 100),
693 :     $cgi->hidden(-override => 1,-name => 'request',-value => 'show_indexed_funcs'),
694 :     $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD),
695 :     $cgi->submit('show functions matching keywords'),$cgi->end_form(),
696 :     "<hr>",
697 :     &virulence_functions_link($cgi,$dataDF));
698 :     foreach $_ (@$html) { print $_ }
699 :    
700 :     my $txt = <<END;
701 :     <pre><br><hr>
702 :     We think of changes as belonging to three broad classes:
703 :    
704 :     1. Genes that are gained or lost,
705 :    
706 :     2. Rearrangements, which we often think of as "changes in adjacency", and
707 :    
708 :     3. SNPs
709 :    
710 :     Users concerned about issues relating to virulence will probably focus on the
711 :     first two classes, unless that have high-quality sequences for a number of
712 :     very close genomes (and they can separate these into virulent and nonvirulent).
713 :    
714 :     On the other hand users interested in what makes a production strain display
715 :     desirable properties may well have a number of very, very close strains, and for them
716 :     SNPs might well be the most interesting form of change.
717 :    
718 :     Lurking behind efforts to accurately display these three types of changes all
719 :     depend heavily on our ability to match up genese between genomes. That is,
720 :     there are (almost inevitably) a set of protein families that are the
721 :     basis of comparison, and these are often of dubious quality. Be aware that
722 :     we have attempted to build fairly accurate families of isofunctional homologs
723 :     (i.e., genes that have the same function and a common ancestor), but they are
724 :     far from perfect.
725 :    
726 :     END
727 :     print $txt;
728 :     }
729 :    
730 :     sub show_otus {
731 :     my($cgi,$datadir) = @_;
732 :    
733 :     print $cgi->header;
734 :     if (opendir(GENERA,$dataDir))
735 :     {
736 :     my @genera = grep { $_ !~ /^\./ } readdir(GENERA);
737 :     closedir(GENERA);
738 :     print "<h1>What Changed?</h1>\n";
739 :     print "<h2><a target=_blank href=\"http://bioseed.mcs.anl.gov/~overbeek/what_changed.html\">Getting Started: a short Tutorial</a></h2>\n";
740 :     print "<h2>Genera Available</h2>\n";
741 :     foreach my $g (sort @genera)
742 :     {
743 : overbeek 1.2 print "<h3><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_options_for_otu&dataD=$g>$g</a>\n";
744 : overbeek 1.1 }
745 :     }
746 :     else
747 :     {
748 :     print "<h1>The dataD parameter is invalid\n";
749 :     }
750 :     }
751 :    
752 :     sub peg_link {
753 :     my($peg) = @_;
754 :    
755 :     return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
756 :     }
757 :    
758 :     sub compare_link {
759 :     my($pegs) = @_;
760 :     my @genomes = map { ($_ =~ /^fig\|(\d+\.\d+)/) ? $1 : () } @$pegs;
761 :     my $args = join("&",map { "show_genome=$_" } @genomes);
762 :     return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=" .
763 :     $pegs->[0] . "&$args>Compare Regions</a>";
764 :     }
765 :    
766 :     sub show_func_link {
767 :     my($dataD,$function) = @_;
768 :    
769 :     if (! $function) { return '' }
770 :     my $functionQ = uri_escape($function);
771 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_func&function=$functionQ&dataD=$dataD>$function</a>"
772 : overbeek 1.1 }
773 :    
774 :     sub virulence_functions_link {
775 :     my($cgi,$dataDF) = @_;
776 :    
777 :     if ((-s "$dataDF/virulence.functions") && ($dataDF =~ /([^\/]+)$/))
778 :     {
779 :     my $dataDQ = uri_escape($1);
780 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_virulence_functions&dataD=$dataDQ>Some Posssible Functions Associated with Virulence</a>";
781 : overbeek 1.1 }
782 :     return '';
783 :     }
784 :    
785 :     sub show_fam_on_tree_link {
786 :     my($dataDF,$fam,$union) = @_;
787 :    
788 :     $union = defined($union) ? $union : 0;
789 :     if ($dataDF =~ /([^\/]+)$/)
790 :     {
791 :     my $dataDQ = uri_escape($1);
792 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&union=$union&dataD=$dataDQ&request=show_family_tree>Family on Tree</a>";
793 : overbeek 1.1 }
794 :     return '';
795 :     }
796 :    
797 :     sub show_node_link {
798 :     my($dataD,$node,$type) = @_;
799 :     if ($type eq "families")
800 :     {
801 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=families>$node</a>";
802 : overbeek 1.1 }
803 :     else
804 :     {
805 : overbeek 1.2 return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=adjacency>$node</a>";
806 : overbeek 1.1 }
807 :     }
808 :    
809 :     sub show_clusters_link {
810 :     my($dataD,$family,$coupled) = @_;
811 :     my %tmp = map { $_ => 1 } ($family,@$coupled);
812 :     my $families = join(",",sort { $a <=> $b} keys(%tmp));
813 : overbeek 1.2 return "<a target=_blank href=\"http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_clusters&dataD=$dataD&families=$families\">$family</a>";
814 : overbeek 1.1 }
815 :    
816 :     sub show_clusters {
817 :     my($cgi,$dataDF,$html) = @_;
818 :    
819 :     my $families = $cgi->param('families');
820 :     my @families = split(/,/,$families);
821 :     my %families = map { $_ => 1 } @families;
822 :     my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`;
823 :     my @genome_pegN_fam_func = sort { ($a->[0] <=> $b->[0]) or ($a->[1] <=> $b->[1]) }
824 :     map { (($_ =~ /^(\S+)\t([^\t]*)\t[^\t]*\tfig\|(\d+\.\d+)\.peg\.(\d+)/) && $families{$1}) ?
825 :     [$3,$4,$1,$2] : ()
826 :     } `cat $dataDF/families.all`;
827 :     push(@$html,$cgi->h1('Relevant Clusters'));
828 :     my $col_hdrs = ['Family','Function','PEG'];
829 :     my $last = shift @genome_pegN_fam_func;
830 :     while ($last)
831 :     {
832 :     my $last_g = $last->[0];
833 :     my $last_pegN = $last->[1];
834 :     my @set;
835 :     while ($last && ($last_g == $last->[0]) && &close($last_pegN,$last->[1]))
836 :     {
837 :     $last_pegN = $last->[1];
838 :     push(@set,[$last->[2],$last->[3],&peg_link("fig|" . $last_g . ".peg." . $last_pegN)]);
839 :     $last = shift @genome_pegN_fam_func;
840 :     }
841 :     if (@set > 1)
842 :     {
843 :     push(@$html,&HTML::make_table($col_hdrs,\@set,"Cluster for $last_g: $genome_names{$last_g}"));
844 :     push(@$html,"<hr><br><br>\n");
845 :     }
846 :     }
847 :     }
848 :    
849 :     sub close {
850 :     my($pegN1,$pegN2) = @_;
851 :    
852 :     return abs($pegN2 - $pegN1) <= 7;
853 :     }
854 :    
855 :     sub show_kovbassa_signatures {
856 :     my($cgi,$dataDF,$html) = @_;
857 :    
858 :     $dataDF =~ /([^\/]+)$/;
859 :     my $dataD = $1;
860 :     my @tree;
861 :     my @tmp = `cat $dataDF/readable.tree`;
862 :     foreach $_ (@tmp)
863 :     {
864 :     if ($_ =~ /^(.*\+ )(n\d+)$/)
865 :     {
866 :     my $start = $1;
867 :     my $node = $2;
868 :     my $link = $cgi->radio_group(-nolabels => 1,
869 :     -name => "radio_$node",
870 :     -override => 1,
871 :     -values => [1,0,2],
872 :     -default => 0);
873 :     push(@tree,"$start$node$link\n");
874 :     }
875 :     elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
876 :     {
877 :     my $start = $1;
878 :     my $node = $2;
879 :     my $end = $3;
880 :     my $link = $cgi->radio_group(-nolabels => 1,
881 :     -name => "radio_$node",
882 :     -override => 1,
883 :     -values => [1,0,2],
884 :     -default => 0);
885 :     push(@tree,"$start",$link,"$end\n");
886 :     }
887 :     else
888 :     {
889 :     push(@tree,$_);
890 :     }
891 :     }
892 :     my $tree = join("",@tree);
893 :     push(@$html,"<br>",
894 : overbeek 1.2 $cgi->start_form(-action => "./families_on_tree.cgi"),
895 : overbeek 1.1 $cgi->hidden(-override => 1,-name => 'request',-value => 'compute_kovbassa_sigs'),
896 :     $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD),
897 :     "<pre>\n$tree\n</pre>\n",
898 :     $cgi->submit('Compute Gene Signatures'));
899 :     }
900 :    
901 :     sub compute_kovbassa_signatures {
902 :     my($cgi,$dataDF,$html) = @_;
903 :    
904 :     $dataDF =~ /([^\/]+)$/;
905 :     my $dataD = $1;
906 :     my @out;
907 :     my @in;
908 :     my @params = grep { $_ =~ /^radio/ } $cgi->param();
909 :     foreach my $param (@params)
910 :     {
911 :     if ($param =~ /^radio_(\S+)/)
912 :     {
913 :     my $node = $1;
914 :     if ($cgi->param($param) == 1)
915 :     {
916 :     push(@out,$node);
917 :     }
918 :     elsif ($cgi->param($param) == 2)
919 :     {
920 :     push(@in,$node);
921 :     }
922 :     }
923 :     }
924 :     my $families = "$dataDF/families.all";
925 :     my $tree = &parse_newick_tree(join("",`cat $dataDF/labeled.tree`));
926 :     my $indexP = &tree_utilities::tree_index_tables($tree);
927 :     my $genomes = {};
928 :     &in_set(\@out,$tree,$indexP,1,$genomes);
929 :     &in_set(\@in,$tree,$indexP,2,$genomes);
930 :     my $s1N = grep { $genomes->{$_} == 1 } keys(%$genomes);
931 :     my $s2N = grep { $genomes->{$_} == 2 } keys(%$genomes);
932 :    
933 :     my %by_family;
934 :     my %fam2func;
935 :     foreach $_ (map { (($_ =~ /^(\S+)\t([^\t]*)\tfig\|(\d+\.\d+)/) && $genomes->{$3}) ? [$1,$2,$3] : () }
936 :     `cut -f1,2,4 $families`)
937 :     {
938 :     my($fam,$func,$g) = @$_;
939 :     $by_family{$fam}->{$g} = 1;
940 :     $fam2func{$fam} = $func;
941 :     }
942 :    
943 :     my @scored_families;
944 :     my %famH;
945 :     foreach my $f (keys(%by_family))
946 :     {
947 :     my $fH = $by_family{$f};
948 :     my @hits = keys(%$fH);
949 :     my $s1Hits = grep { $genomes->{$_} == 1 } @hits;
950 :     my $s2Hits = grep { $genomes->{$_} == 2 } @hits;
951 :     my $counts1 = [$s1Hits,$s1N-$s1Hits];
952 :     my $counts2 = [$s2Hits,$s2N-$s2Hits];
953 :     my($din,$dout) = &discriminates($counts2,$counts1);
954 :     my $disc = $din+$dout;
955 :     if ($disc > 1.5)
956 :     {
957 :     $famH{$f} = 1;
958 :     push(@scored_families,[sprintf("%0.3f",$disc),$f]);
959 :     }
960 :     }
961 :     my $coupledH = &coupling_data($dataDF,\%famH);
962 :     my $col_hdrs = ['Score','Tree','Family','Mean Length','Function','Clusters','Coupling'];
963 :     my %mean_len = map { ($_ =~ /^(\S+)\t(\S+)/) ? ($1 => $2) : () } `cut -f1,6 $dataDF/families.all`;
964 :     my $rows = [];
965 :     foreach my $tuple (sort { $b->[0] <=> $a->[0] } @scored_families)
966 :     {
967 :     my($sc,$fam) = @$tuple;
968 :     my $func = $fam2func{$fam};
969 :     my $func_link = &show_func_link($dataD,$func);
970 :     my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($fam,$coupledH,\%fam2func,$dataD);
971 :     push(@$rows,[$sc,
972 :     &show_fam_links($dataDF,$fam),
973 :     $mean_len{$fam},
974 :     $func_link,
975 :     $cluster_link,
976 :     $coupled_html
977 :     ]);
978 :     }
979 :     push(@$html,&HTML::make_table($col_hdrs,$rows,"Families that Distinguish"));
980 :     }
981 :    
982 :     sub in_set {
983 :     my($nodes,$tree,$indexP,$which,$genomes) = @_;
984 :    
985 :     foreach my $x (@$nodes)
986 :     {
987 :     if ($x =~ /^(\d+\.\d+)/)
988 :     {
989 :     $genomes->{$1} = $which;
990 :     }
991 :     elsif (($x =~ /^(n\d+)/) && (my $node = &label_to_node($indexP,$1)))
992 :     {
993 :     my $tips = &tips_of_tree($node);
994 :     foreach my $tip (@$tips)
995 :     {
996 :     $genomes->{$tip} = $which;
997 :     }
998 :     }
999 :     }
1000 :     }
1001 :    
1002 :     sub discriminates {
1003 :     my($xin,$xout) = @_;
1004 :    
1005 :     my $sx = &vector_sum($xin);
1006 :     my $sy = &vector_sum($xout);
1007 :     my $xy = &scalar_product($xin,$xout);
1008 :     my $xx = &scalar_product($xin,$xin);
1009 :     my $yy = &scalar_product($xout,$xout);
1010 :    
1011 :     my $din = (($sx != 0) && ($yy != 0)) ? (1 - (($sy * $xy) / ($sx * $yy))) : 0;
1012 :     my $dout = (($sy != 0) && ($xx != 0)) ? (1 - (($sx * $xy) / ($sy * $xx))) : 0;
1013 :     return ($din,$dout);
1014 :     }
1015 :    
1016 :     sub vector_sum {
1017 :     my($v) = @_;
1018 :    
1019 :     my $sum = 0;
1020 :     foreach $_ (@$v)
1021 :     {
1022 :     $sum += $_;
1023 :     }
1024 :     return $sum;
1025 :     }
1026 :    
1027 :     sub scalar_product {
1028 :     my($x,$y) = @_;
1029 :    
1030 :     if (@$x != @$y)
1031 :     {
1032 :     print STDERR &Dumper($x,$y);
1033 :     die "incompatable";
1034 :     }
1035 :     my $i;
1036 :     my $sum = 0;
1037 :     for ($i=0; ($i < @$x); $i++)
1038 :     {
1039 :     $sum += $x->[$i] * $y->[$i];
1040 :     }
1041 :     return $sum;
1042 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3