Parent Directory
|
Revision Log
Revision 1.2 - (view) (download)
1 : | overbeek | 1.1 | ######################################################################## |
2 : | use CGI; | ||
3 : | |||
4 : | |||
5 : | if (-f "$FIG_Config::data/Global/why_down") | ||
6 : | { | ||
7 : | local $/; | ||
8 : | open my $fh, "<$FIG_Config::data/Global/why_down"; | ||
9 : | my $down_msg = <$fh>; | ||
10 : | |||
11 : | print CGI::header(); | ||
12 : | print CGI::head(CGI::title("SEED Server down")); | ||
13 : | print CGI::start_body(); | ||
14 : | print CGI::h1("SEED Server down"); | ||
15 : | print CGI::p("The seed server is not currently running:"); | ||
16 : | print CGI::pre($down_msg); | ||
17 : | print CGI::end_body(); | ||
18 : | exit; | ||
19 : | } | ||
20 : | |||
21 : | if ($FIG_Config::readonly) | ||
22 : | { | ||
23 : | CGI::param("user", undef); | ||
24 : | } | ||
25 : | ######################################################################## | ||
26 : | use CGI; | ||
27 : | |||
28 : | |||
29 : | if (-f "$FIG_Config::data/Global/why_down") | ||
30 : | { | ||
31 : | local $/; | ||
32 : | open my $fh, "<$FIG_Config::data/Global/why_down"; | ||
33 : | my $down_msg = <$fh>; | ||
34 : | |||
35 : | print CGI::header(); | ||
36 : | print CGI::head(CGI::title("SEED Server down")); | ||
37 : | print CGI::start_body(); | ||
38 : | print CGI::h1("SEED Server down"); | ||
39 : | print CGI::p("The seed server is not currently running:"); | ||
40 : | print CGI::pre($down_msg); | ||
41 : | print CGI::end_body(); | ||
42 : | exit; | ||
43 : | } | ||
44 : | |||
45 : | if ($FIG_Config::readonly) | ||
46 : | { | ||
47 : | CGI::param("user", undef); | ||
48 : | } | ||
49 : | ######################################################################## | ||
50 : | # -*- perl -*- | ||
51 : | # | ||
52 : | # Copyright (c) 2003-2006 University of Chicago and Fellowship | ||
53 : | # for Interpretations of Genomes. All Rights Reserved. | ||
54 : | # | ||
55 : | # This file is part of the SEED Toolkit. | ||
56 : | # | ||
57 : | # The SEED Toolkit is free software. You can redistribute | ||
58 : | # it and/or modify it under the terms of the SEED Toolkit | ||
59 : | # Public License. | ||
60 : | # | ||
61 : | # You should have received a copy of the SEED Toolkit Public License | ||
62 : | # along with this program; if not write to the University of Chicago | ||
63 : | # at info@ci.uchicago.edu or the Fellowship for Interpretation of | ||
64 : | # Genomes at veronika@thefig.info or download a copy from | ||
65 : | # http://www.theseed.org/LICENSE.TXT. | ||
66 : | # | ||
67 : | |||
68 : | use DB_File; | ||
69 : | use URI::Escape; # uri_escape | ||
70 : | use gjoseqlib; | ||
71 : | use HTML; | ||
72 : | use strict; | ||
73 : | use CGI; | ||
74 : | my $cgi = new CGI; | ||
75 : | use SeedEnv; | ||
76 : | use tree_utilities; | ||
77 : | |||
78 : | if (0) { | ||
79 : | print $cgi->header; | ||
80 : | my @params = $cgi->param; | ||
81 : | print "<pre>\n"; | ||
82 : | foreach $_ (@params) { | ||
83 : | print "$_\t:",join(",",$cgi->param($_)),":\n"; | ||
84 : | } | ||
85 : | exit; | ||
86 : | } | ||
87 : | |||
88 : | my $html = []; | ||
89 : | |||
90 : | my $node = $cgi->param('node'); | ||
91 : | my $family = $cgi->param('family'); | ||
92 : | my $ali = $cgi->param('alignment'); | ||
93 : | my $tree = $cgi->param('tree'); | ||
94 : | my $request = $cgi->param('request'); | ||
95 : | my $keywords = $cgi->param('keywords'); | ||
96 : | my $function = $cgi->param('function'); | ||
97 : | my $dataD = $cgi->param('dataD'); | ||
98 : | my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS"; | ||
99 : | my $dataDF = "$dataDir/$dataD"; | ||
100 : | |||
101 : | if ($request eq "show_otus") | ||
102 : | { | ||
103 : | &show_otus($cgi,$dataDir); | ||
104 : | exit; | ||
105 : | } | ||
106 : | elsif (($request eq "show_options_for_otu") && $dataD) | ||
107 : | { | ||
108 : | &show_options_for_otu($cgi,$dataD); | ||
109 : | exit; | ||
110 : | } | ||
111 : | |||
112 : | if (($request eq "show_virulence_functions") && (-s "$dataDF/virulence.functions")) | ||
113 : | { | ||
114 : | &show_virulence_functions($cgi,$dataDF,$html); | ||
115 : | } | ||
116 : | elsif (($request eq 'show_indexed_funcs') && $keywords) | ||
117 : | { | ||
118 : | &show_indexed_funcs($cgi,$dataDF,$html,$keywords); | ||
119 : | } | ||
120 : | elsif (($request eq "show_func") && $function) | ||
121 : | { | ||
122 : | $function =~ s/^\s+//; | ||
123 : | $function =~ s/\s+$//; | ||
124 : | &show_func($cgi,$dataDF,$html,$function); | ||
125 : | } | ||
126 : | elsif (($request eq "show_ali_or_occurs_tree") && $ali) | ||
127 : | { | ||
128 : | &show_ali($cgi); | ||
129 : | exit; | ||
130 : | } | ||
131 : | elsif (($request eq "show_ali_or_occurs_tree") && $tree) | ||
132 : | { | ||
133 : | &show_occurs_tree($cgi,$dataDF); | ||
134 : | exit; | ||
135 : | } | ||
136 : | elsif (($request eq "show_family_pegs") && $family) | ||
137 : | { | ||
138 : | &show_family1($cgi,$dataDF,$html,$family); | ||
139 : | } | ||
140 : | elsif (($request eq "show_family_tree") && $family) | ||
141 : | { | ||
142 : | &show_family2($cgi,$dataDF,$html,$family); | ||
143 : | } | ||
144 : | elsif (($request eq "show_node") && $node) | ||
145 : | { | ||
146 : | &show_changes($cgi,$dataDF,$html,$node); | ||
147 : | } | ||
148 : | elsif ($request eq "show_otu_tree") | ||
149 : | { | ||
150 : | &show_otu_tree($cgi,$dataDF,$html,'families'); | ||
151 : | } | ||
152 : | elsif ($request eq "show_adjacency") | ||
153 : | { | ||
154 : | &show_otu_tree($cgi,$dataDF,$html,'adjacency'); | ||
155 : | } | ||
156 : | elsif ($request eq "show_clusters") | ||
157 : | { | ||
158 : | &show_clusters($cgi,$dataDF,$html); | ||
159 : | } | ||
160 : | elsif ($request eq "show_kovbassa") | ||
161 : | { | ||
162 : | &show_kovbassa_signatures($cgi,$dataDF,$html); | ||
163 : | } | ||
164 : | elsif ($request eq "compute_kovbassa_sigs") | ||
165 : | { | ||
166 : | &compute_kovbassa_signatures($cgi,$dataDF,$html); | ||
167 : | } | ||
168 : | else | ||
169 : | { | ||
170 : | push(@$html,"<h1>Invalid request</h1>"); | ||
171 : | } | ||
172 : | &HTML::show_page($cgi,$html); | ||
173 : | exit; | ||
174 : | |||
175 : | sub show_otu_tree { | ||
176 : | my($cgi,$dataDF,$html,$type) = @_; | ||
177 : | |||
178 : | $dataDF =~ /([^\/]+)$/; | ||
179 : | my $dataD = $1; | ||
180 : | my @tree; | ||
181 : | my @tmp = `cat $dataDF/readable.tree`; | ||
182 : | foreach $_ (@tmp) | ||
183 : | { | ||
184 : | if ($_ =~ /^(.*\+ )(n\d+)$/) | ||
185 : | { | ||
186 : | my $start = $1; | ||
187 : | my $node = $2; | ||
188 : | my $link = &show_node_link($dataD,$node,$type); | ||
189 : | push(@tree,"$start$link\n"); | ||
190 : | } | ||
191 : | elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) | ||
192 : | { | ||
193 : | my $start = $1; | ||
194 : | my $node = $2; | ||
195 : | my $end = $3; | ||
196 : | push(@tree,"$start",&show_node_link($dataD,$node,$type),"$end\n"); | ||
197 : | } | ||
198 : | else | ||
199 : | { | ||
200 : | push(@tree,$_); | ||
201 : | } | ||
202 : | } | ||
203 : | |||
204 : | my $tree = join("",@tree); | ||
205 : | push(@$html,"<pre>\n$tree\n</pre>\n"); | ||
206 : | } | ||
207 : | |||
208 : | sub show_family1 { | ||
209 : | my($cgi,$dataDF,$html,$family) = @_; | ||
210 : | |||
211 : | $dataDF =~ /([^\/]+)$/; | ||
212 : | my $dataD = $1; | ||
213 : | |||
214 : | my $func; | ||
215 : | foreach $_ (`cut -f1,2 $dataDF/families.all`) | ||
216 : | { | ||
217 : | if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($1 == $family)) { $func = $2 } | ||
218 : | } | ||
219 : | my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`; | ||
220 : | my $col_hdrs = ['','Genome','Genome Name','Peg']; | ||
221 : | my @tuples = map { my $tuple = $_; $tuple->[3] = &peg_link($tuple->[3]); $tuple } | ||
222 : | sort { $a->[1] cmp $b->[1] } | ||
223 : | map { (($_ =~ /^(\d+)\t([^\t]*)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && ($1 == $family)) ? | ||
224 : | [$cgi->checkbox(-name => "check.$3",-checked => 1,-label => ''), | ||
225 : | $4, | ||
226 : | $genome_names{$4}, | ||
227 : | $3] : () } | ||
228 : | `cut -f1,2,4 $dataDF/families.all`; | ||
229 : | overbeek | 1.2 | push(@$html,$cgi->start_form(-action => "./families_on_tree.cgi")); |
230 : | overbeek | 1.1 | push(@$html,"<br><br>\n",&HTML::make_table($col_hdrs,\@tuples,"Distribution of Family $family: $func"),$cgi->hr,"\n"); |
231 : | push(@$html,"<input type=hidden name=dataD value=$dataD>\n"); | ||
232 : | push(@$html,"<input type=hidden name=request value=show_ali_or_occurs_tree>\n"); | ||
233 : | push(@$html,$cgi->submit('alignment'), | ||
234 : | $cgi->checkbox(-name => "dna",-checked => 0,-label => 'DNA'), | ||
235 : | "<br><br>", | ||
236 : | $cgi->submit('tree'), | ||
237 : | $cgi->end_form()); | ||
238 : | } | ||
239 : | |||
240 : | sub show_family2 { | ||
241 : | my($cgi,$dataDF,$html,$family) = @_; | ||
242 : | # if union is specified, you want to treat all families with the same function | ||
243 : | # as "present" | ||
244 : | my $union = $cgi->param('union'); | ||
245 : | my $func; # used only if union is requested | ||
246 : | my @fam_func_peg_tuples = map { chomp; [split(/\t/,$_)] } `cut -f1,2,4 $dataDF/families.all`; | ||
247 : | if ($union) { | ||
248 : | my @tmp = grep { $_->[0] == $family } @fam_func_peg_tuples; | ||
249 : | $func = $tmp[0]->[1]; | ||
250 : | } | ||
251 : | |||
252 : | $dataDF =~ /([^\/]+)$/; | ||
253 : | my $dataD = $1; | ||
254 : | |||
255 : | push(@$html,"<br><hr>",&show_fam_table_link($dataDF,$family),"<br><br>"); | ||
256 : | my @tuples = grep { $union ? ($_->[1] eq $func) : ($_->[0] == $family) } | ||
257 : | @fam_func_peg_tuples; | ||
258 : | my $func = (@tuples > 0) ? $tuples[0]->[1] : 'hypothetical protein'; | ||
259 : | my %has = map { ($_->[2] =~ /fig\|(\d+\.\d+)/) ? ($1 => 1) : () } @tuples; | ||
260 : | my @tree; | ||
261 : | my %node_vals; | ||
262 : | if (! -s "$dataDF/fams.on.tree.db") | ||
263 : | { | ||
264 : | %node_vals = map { (($_ =~ /^(\S+)\t(n\d+)\t(\S+)/) && ($family eq $1)) ? ($2 => $3) : () } `cat $dataDF/families.on.tree`; | ||
265 : | } | ||
266 : | else | ||
267 : | { | ||
268 : | if ($dataD ne "bacteria") { die "only bacteria have a db"; } | ||
269 : | my $bacteriaD = "/homes/overbeek/Ross/MakeCS/Data/CS/bacteria"; | ||
270 : | my $fams_on_tree_db = "$bacteriaD/fams.on.tree.db"; | ||
271 : | my %fams_on_tree; | ||
272 : | tie %fams_on_tree,"DB_File", $fams_on_tree_db, O_RDWR, 0666, $DB_HASH | ||
273 : | or die "Cannot open file '$fams_on_tree_db': $!\n"; | ||
274 : | my $node_vals = $fams_on_tree{$family}; | ||
275 : | %node_vals = map { $_ =~ s/://g; ($_ =~ /^(\S+),(\S+)/) ? ($1 => $2) : () } split("::",$node_vals); | ||
276 : | } | ||
277 : | |||
278 : | my @tmp = `cat $dataDF/readable.tree`; | ||
279 : | foreach $_ (@tmp) | ||
280 : | { | ||
281 : | if ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) | ||
282 : | { | ||
283 : | my $start = $1; | ||
284 : | my $genome = $2; | ||
285 : | my $end = $3; | ||
286 : | if ($has{$genome}) | ||
287 : | { | ||
288 : | push(@tree,"$start<b>$genome$end</b>\n"); | ||
289 : | } | ||
290 : | else | ||
291 : | { | ||
292 : | push(@tree,"$start$genome$end\n"); | ||
293 : | } | ||
294 : | } | ||
295 : | elsif (($_ =~ /^(.*\+ )(n\d+)$/) && (! $union)) | ||
296 : | { | ||
297 : | my $start = $1; | ||
298 : | my $node = $2; | ||
299 : | my $status = $node_vals{$node}; | ||
300 : | my $to_show = $node . ":$status"; | ||
301 : | push(@tree,"$start$to_show\n"); | ||
302 : | } | ||
303 : | else | ||
304 : | { | ||
305 : | push(@tree,$_); | ||
306 : | } | ||
307 : | } | ||
308 : | my $tree = join("",@tree); | ||
309 : | push(@$html,"<br><h2>$func</h2><br><pre>\n$tree\n</pre>\n"); | ||
310 : | } | ||
311 : | |||
312 : | sub show_changes { | ||
313 : | my($cgi,$dataDF,$html,$node) = @_; | ||
314 : | |||
315 : | my $type = $cgi->param('type'); | ||
316 : | if ($type eq 'families') | ||
317 : | { | ||
318 : | &show_changes_families($cgi,$dataDF,$html,$node); | ||
319 : | } | ||
320 : | else | ||
321 : | { | ||
322 : | &show_changes_adjacency($cgi,$dataDF,$html,$node); | ||
323 : | } | ||
324 : | } | ||
325 : | |||
326 : | sub show_changes_adjacency { | ||
327 : | my($cgi,$dataDF,$html,$node) = @_; | ||
328 : | $dataDF =~ /([^\/]+)$/; | ||
329 : | my $dataD = $1; | ||
330 : | my $col_hdrs = ['Family','Function','Ancestral','New','Compare']; | ||
331 : | my @events = grep { $node eq $_->[1] } | ||
332 : | map { ($_ =~ /(\S+)\t(\S+)\t(\d+):\S+\t(\d+):\S+\t(\d+)/) ? [$1,$2,$3,$4,$5] : () } | ||
333 : | `cat $dataDF/placed.events`; | ||
334 : | my %families = map { ($_->[2] => [$_->[3],$_->[4]])} @events; | ||
335 : | my %pegs_needed = map { (($_->[2] => 1), ($_->[3] => 1),($_->[4] => 1)) } @events; | ||
336 : | my %fam_peg = map { my $x; | ||
337 : | (($_ =~ /^(\d+)\t\S+\t(\d+)\t\S+\t\S+\t(\S+)\t(\S+)/) && | ||
338 : | ($x = $families{$1}) && (($x->[0] eq $2) || ($x->[1] eq $2))) ? ("$1,$2" => $3) : () } | ||
339 : | `cat $dataDF/adjacency.of.unique`; | ||
340 : | my %peg_to_func = map { (($_ =~ /^([^t]+)\t([^\t]*)\t(\S+)/) && $pegs_needed{$1}) ? ($3 => $2) : () } `cut -f1,2,4 $dataDF/families.all`; | ||
341 : | my @rows; | ||
342 : | my $ancestor; | ||
343 : | foreach my $event (@events) | ||
344 : | { | ||
345 : | my($anc,$node,$fam,$fam1,$fam2) = @$event; | ||
346 : | $ancestor = $anc; | ||
347 : | my $peg1 = $fam_peg{"$fam,$fam1"}; | ||
348 : | my $peg2 = $fam_peg{"$fam,$fam2"}; | ||
349 : | my $func = $peg_to_func{$peg1}; | ||
350 : | if ($peg1 && $peg2 && $func) | ||
351 : | { | ||
352 : | push(@rows,[&show_fam_table_link($dataDF,$fam), | ||
353 : | $func, | ||
354 : | &peg_link($peg1), | ||
355 : | &peg_link($peg2), | ||
356 : | &compare_link([$peg1,$peg2])]); | ||
357 : | } | ||
358 : | } | ||
359 : | push(@$html,&HTML::make_table($col_hdrs,\@rows,"Changes in Adjacency from $ancestor")); | ||
360 : | } | ||
361 : | |||
362 : | |||
363 : | sub show_changes_families { | ||
364 : | my($cgi,$dataDF,$html,$node) = @_; | ||
365 : | |||
366 : | $dataDF =~ /([^\/]+)$/; | ||
367 : | my $dataD = $1; | ||
368 : | |||
369 : | my %func = map { ($_ =~ /^(\d+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } `cut -f1,2 $dataDF/families.all`; | ||
370 : | my $col_hdrs = ['Show Where','Show PEGs','Family','Function','Clusters','Coupling']; | ||
371 : | my @tmp = grep { (($_ =~ /^\S+\t(\S+)/) && ($1 eq $node)) } | ||
372 : | `cat /$dataDF/where.shifts.occurred`; | ||
373 : | my @tabG = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) } | ||
374 : | map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t0\t1/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } | ||
375 : | @tmp; | ||
376 : | # tabG entries are [linkT,linkP,ancestor,fam,func] | ||
377 : | |||
378 : | # try to pick up the ancestor node from the first entry in @tabG | ||
379 : | # If you cannot get it, try to take it from @tabL | ||
380 : | my $anc = (@tabG > 0) ? $tabG[0]->[-3] : undef; | ||
381 : | foreach $_ (@tabG) { splice(@$_,2,1) } ### get rid of ancestor | ||
382 : | ## tabG entries are [linkT,linkP,fam,func] | ||
383 : | |||
384 : | my @tabL = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) } | ||
385 : | map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t1\t0/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } | ||
386 : | @tmp; | ||
387 : | if (! $anc) | ||
388 : | { | ||
389 : | $anc = (@tabL > 0) ? $tabL[0]->[-3] : ''; | ||
390 : | } | ||
391 : | foreach $_ (@tabL) { splice(@$_,2,1) } ### get rid of ancestor | ||
392 : | |||
393 : | ## @tabG and @tabL are of the form [link-to-tree,link-to-peg-display,family,function]] | ||
394 : | ## we now add coupling data. | ||
395 : | |||
396 : | my $with_couplingL = &build_table(\@tabL,$dataDF); | ||
397 : | my $with_couplingG = &build_table(\@tabG,$dataDF); | ||
398 : | |||
399 : | push(@$html,&HTML::make_table($col_hdrs,$with_couplingG,"Families Gained from Ancestor $anc"),$cgi->hr,"\n"); | ||
400 : | push(@$html,&HTML::make_table($col_hdrs,$with_couplingL,"Families Lost from Ancestor $anc"),$cgi->hr,"\n"); | ||
401 : | } | ||
402 : | |||
403 : | sub coupling_data { | ||
404 : | my($dataDF,$famsH) = @_; | ||
405 : | |||
406 : | my $coupled = {}; | ||
407 : | foreach $_ (`cat $dataDF/coupled.families`) | ||
408 : | { | ||
409 : | if (($_ =~ /(\S+)\t(\S+)\t(\d+)/) && $famsH->{$1} && $famsH->{$2} && ($1 ne $2)) | ||
410 : | { | ||
411 : | push(@{$coupled->{$1}},$2); | ||
412 : | } | ||
413 : | } | ||
414 : | return $coupled; | ||
415 : | } | ||
416 : | |||
417 : | sub cluster_link_and_cluster_html { | ||
418 : | my($family,$coupledH,$fam_to_func,$dataD) = @_; | ||
419 : | |||
420 : | my $cluster_link = ''; | ||
421 : | my $coupled = $coupledH->{$family}; | ||
422 : | my $coupled_html = ""; | ||
423 : | if ($coupled && (@$coupled > 0)) | ||
424 : | { | ||
425 : | $cluster_link = &show_clusters_link($dataD,$family,$coupled); | ||
426 : | $coupled_html = join("<br>",map { &show_fam_table_link($dataD,$_) . '(' . | ||
427 : | &show_func_link($dataD,$fam_to_func->{$_}) . ')' | ||
428 : | } @$coupled); | ||
429 : | } | ||
430 : | return ($cluster_link,$coupled_html); | ||
431 : | } | ||
432 : | |||
433 : | sub build_table { | ||
434 : | my($tab,$dataDF) = @_; | ||
435 : | $dataDF =~ /([^\/]+)$/; | ||
436 : | my $dataD = $1; | ||
437 : | |||
438 : | my %famH = map { ($_->[-2] => 1) } @$tab; | ||
439 : | my %fam_to_func = map { ($_->[2] => $_->[3]) } @$tab; | ||
440 : | my $coupledH = &coupling_data($dataDF,\%famH); | ||
441 : | my @with_coupling; | ||
442 : | foreach my $tuple (@$tab) | ||
443 : | { | ||
444 : | my($link1,$link2,$family,$function) = @$tuple; | ||
445 : | $tuple->[3] = &show_func_link($dataD,$function); | ||
446 : | my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($family,$coupledH,\%fam_to_func,$dataD); | ||
447 : | $tuple->[4] = $cluster_link; | ||
448 : | $tuple->[5] = $coupled_html; | ||
449 : | push(@with_coupling,$tuple); | ||
450 : | } | ||
451 : | return \@with_coupling; | ||
452 : | } | ||
453 : | |||
454 : | sub show_fam_links { | ||
455 : | my($dataDF,$fam) = @_; | ||
456 : | |||
457 : | $dataDF =~ /([^\/]+)$/; | ||
458 : | my $dataD = $1; | ||
459 : | |||
460 : | overbeek | 1.2 | return ("<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_tree&family=$fam&dataD=$dataD>tree</a>", |
461 : | "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_pegs&family=$fam&pegs=1&dataD=$dataD>PEGs:$fam</a>"); | ||
462 : | overbeek | 1.1 | } |
463 : | |||
464 : | sub show_fam_table_link { | ||
465 : | my($dataDF,$fam) = @_; | ||
466 : | |||
467 : | $dataDF =~ /([^\/]+)$/; | ||
468 : | my $dataD = $1; | ||
469 : | |||
470 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&pegs=1&request=show_family_pegs&dataD=$dataD>$fam: PEGs</a>"; |
471 : | overbeek | 1.1 | } |
472 : | |||
473 : | sub show_func { | ||
474 : | my($cgi,$dataDF,$html,$function) = @_; | ||
475 : | $dataDF =~ /([^\/]+)$/; | ||
476 : | my $dataD = $1; | ||
477 : | |||
478 : | my %counts; | ||
479 : | my %fams = map { my $x; | ||
480 : | if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($2 eq $function)) | ||
481 : | { | ||
482 : | $counts{$1}++; | ||
483 : | ($1 => 1); | ||
484 : | } | ||
485 : | else | ||
486 : | { | ||
487 : | () | ||
488 : | } | ||
489 : | } `cat $dataDF/families.all`; | ||
490 : | my $col_hdrs = ['Family','PEGs in Family','Distribution on Tree','Size']; | ||
491 : | my @tab = map { [$_, | ||
492 : | &show_fam_table_link($dataDF,$_), | ||
493 : | &show_fam_on_tree_link($dataDF,$_), | ||
494 : | $counts{$_} | ||
495 : | ] } sort { $a <=> $b } keys(%fams); | ||
496 : | push(@$html,"<h1>$function</h1>", &HTML::make_table($col_hdrs,\@tab,"Families with Function"),"<hr>"); | ||
497 : | |||
498 : | my @fams = keys(%fams); | ||
499 : | if (@fams > 1) | ||
500 : | { | ||
501 : | push(@$html,"<br>",&show_fam_on_tree_link($dataDF,$fams[0],"union")," with union of families<br><br>"); | ||
502 : | } | ||
503 : | push(@$html,"<hr>"); | ||
504 : | |||
505 : | my @gained = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t0\t1/) && $fams{$3}) ? | ||
506 : | [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () } | ||
507 : | `cat $dataDF/where.shifts.occurred`; | ||
508 : | push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@gained,"Where $function was GAINED")); | ||
509 : | push(@$html,"<hr><br>"); | ||
510 : | my @lost = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t1\t0/) && $fams{$3}) ? | ||
511 : | [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () } | ||
512 : | `cat $dataDF/where.shifts.occurred`; | ||
513 : | push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@lost,"Where $function was LOST")); | ||
514 : | } | ||
515 : | |||
516 : | sub show_indexed_funcs { | ||
517 : | my($cgi,$dataDF,$html,$keywords) = @_; | ||
518 : | |||
519 : | $dataDF =~ /([^\/]+)$/; | ||
520 : | my $dataD = $1; | ||
521 : | |||
522 : | my $functions_in_fams = &functions_in_at_least_one_family($dataDF); | ||
523 : | # $keywords = "$dataD " . $keywords; ### tell the user to add it,if necessary | ||
524 : | |||
525 : | my %funcs_to_show; | ||
526 : | foreach my $func (`svr_sphinx_indexing -k \'$keywords\' | cut -f1 | svr_function_of | cut -f2`) | ||
527 : | { | ||
528 : | chomp $func; | ||
529 : | if ($functions_in_fams->{$func}) | ||
530 : | { | ||
531 : | $funcs_to_show{$func}++; | ||
532 : | } | ||
533 : | } | ||
534 : | my @funcs = sort { $funcs_to_show{$b} <=> $funcs_to_show{$a} } keys(%funcs_to_show); | ||
535 : | if (@funcs == 0) | ||
536 : | { | ||
537 : | push(@$html,"<h1>Sorry, no functions matched</h1>\n"); | ||
538 : | } | ||
539 : | else | ||
540 : | { | ||
541 : | my @links = map { [&show_func_link($dataD,$_)] } @funcs; | ||
542 : | push(@$html,&HTML::make_table(['Possible Functions'],\@links,"Possible functions - Select to find nodes where shifts occurred")); | ||
543 : | } | ||
544 : | } | ||
545 : | |||
546 : | sub show_virulence_functions { | ||
547 : | my($cgi,$dataDF,$html) = @_; | ||
548 : | |||
549 : | $dataDF =~ /([^\/]+)$/; | ||
550 : | my $dataD = $1; | ||
551 : | |||
552 : | my $functions_in_fams = &functions_in_at_least_one_family($dataDF); | ||
553 : | my @virulence_functions = map { chomp; $functions_in_fams->{$_} ? $_ : () } `cat $dataDF/virulence.functions`; | ||
554 : | my @links = map { [&show_func_link($dataD,$_)] } sort @virulence_functions; | ||
555 : | push(@$html,&HTML::make_table(['Function Sometimes Associated with Virulence'], | ||
556 : | \@links, | ||
557 : | 'Functions Known to Be Associated with Virulence in Some Organisms')); | ||
558 : | } | ||
559 : | sub functions_in_at_least_one_family { | ||
560 : | my($dataDF) = @_; | ||
561 : | |||
562 : | my %functions_in_fams = map { chomp; ($_ => 1) } `cut -f2 $dataDF/families.all`; | ||
563 : | return \%functions_in_fams; | ||
564 : | } | ||
565 : | |||
566 : | sub subsystems_of { | ||
567 : | my($fig,$reaction_to_roles,$reaction) = @_; | ||
568 : | |||
569 : | my $roles_and_pegs = $reaction_to_roles->{$reaction}; | ||
570 : | my @roles = map { my $x = $_; $x =~ s/^[^:]+://; $x } @{$reaction_to_roles->{$reaction}}; | ||
571 : | my $printable_roles = ""; | ||
572 : | if (@roles > 0) | ||
573 : | { | ||
574 : | my %tmp = map { $_ =~ /^([^:]+):(\S.*\S)/; ($2 => $1) } @$roles_and_pegs; | ||
575 : | my @tmp = map { &peg_link($tmp{$_}) . "<br>" . $_ } sort keys(%tmp); | ||
576 : | $printable_roles = join(",",@tmp); | ||
577 : | } | ||
578 : | my %subsys; | ||
579 : | foreach my $role (@roles) | ||
580 : | { | ||
581 : | foreach my $s ($fig->role_to_subsystems($role)) | ||
582 : | { | ||
583 : | $subsys{$s} = 1; | ||
584 : | } | ||
585 : | } | ||
586 : | return join("\t",sort keys(%subsys)) . "<br>" . $printable_roles; | ||
587 : | } | ||
588 : | |||
589 : | sub show_ali { | ||
590 : | my($cgi) = @_; | ||
591 : | my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param(); | ||
592 : | if (@checked > 1) | ||
593 : | { | ||
594 : | my %checked = map { $_ => 1 } @checked; | ||
595 : | my $dataD = $cgi->param('dataD'); | ||
596 : | my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS"; | ||
597 : | my $dataDF = "$dataDir/$dataD"; | ||
598 : | |||
599 : | my $tmp_seqs_in = "$FIG_Config::temp/tmp$$.seqs.in"; | ||
600 : | my $tmp_seqs_ali = "$FIG_Config::temp/tmp$$.ali"; | ||
601 : | open(SEQS,">$tmp_seqs_in") || die "could not open $tmp_seqs_in"; | ||
602 : | my $dna = $cgi->param('dna'); | ||
603 : | opendir(GENOMES,"$dataDF/Seqs") || die "could not open $dataDF/Seqs"; | ||
604 : | my @genomes = grep { $_ !~ /^\./ } readdir(GENOMES); | ||
605 : | closedir(GENOMES); | ||
606 : | foreach my $g (@genomes) | ||
607 : | { | ||
608 : | my @seqs; | ||
609 : | if($dna) | ||
610 : | { | ||
611 : | @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/PegDNA/$g"); | ||
612 : | } | ||
613 : | else | ||
614 : | { | ||
615 : | @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/Seqs/$g"); | ||
616 : | } | ||
617 : | foreach my $tuple(@seqs) | ||
618 : | { | ||
619 : | my($peg,undef,$seq) = @$tuple; | ||
620 : | print SEQS ">$peg\n$seq\n"; | ||
621 : | } | ||
622 : | } | ||
623 : | close(SEQS); | ||
624 : | |||
625 : | &SeedUtils::run("svr_align_seqs < $tmp_seqs_in | svr_ali_to_html > $tmp_seqs_ali"); | ||
626 : | open(SEQS,"<$tmp_seqs_ali") || die "could not open $tmp_seqs_ali"; | ||
627 : | print $cgi->header; | ||
628 : | while (defined($_ = <SEQS>)) | ||
629 : | { | ||
630 : | $_ =~ s/\b(fig\|\d+\.\d+\.peg\.\d+)/<a target=_blank href=http:\/\/pubseed.theseed.org\/seedviewer.cgi?page=Annotation&feature=$1>$1<\/a>/g; | ||
631 : | print $_; | ||
632 : | } | ||
633 : | close(SEQS); | ||
634 : | unlink($tmp_seqs_in,$tmp_seqs_ali); | ||
635 : | } | ||
636 : | } | ||
637 : | |||
638 : | sub show_occurs_tree { | ||
639 : | my($cgi,$dataDF) = @_; | ||
640 : | |||
641 : | my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param(); | ||
642 : | if (@checked > 3) | ||
643 : | { | ||
644 : | my %genome_name = | ||
645 : | map { ($_ =~ /^(\d+\.\d+)\t(\S.*\S)/) ? ($1 => $2) : () } | ||
646 : | `cat $dataDF/genome.names`; | ||
647 : | my $tmp_labels = "$FIG_Config::temp/tmp$$.labels"; | ||
648 : | open(LABELS,">$tmp_labels") || die "could not open $tmp_labels"; | ||
649 : | foreach my $peg (@checked) | ||
650 : | { | ||
651 : | if ($peg =~ /^fig\|(\d+\.\d+)/) | ||
652 : | { | ||
653 : | my $g = $genome_name{$1}; | ||
654 : | my $link = &peg_link($peg); | ||
655 : | print LABELS "$peg\t$link: $g\n"; | ||
656 : | } | ||
657 : | } | ||
658 : | close(LABELS); | ||
659 : | my $tmp_seqs = "$FIG_Config::temp/tmp$$.seqs"; | ||
660 : | open(SEQS,"| svr_fasta -fasta -protein | svr_align_seqs | svr_tree | sketch_tree -m -l $tmp_labels > $tmp_seqs") | ||
661 : | || die "could not open $tmp_seqs"; | ||
662 : | foreach $_ (@checked) | ||
663 : | { | ||
664 : | print SEQS "$_\n"; | ||
665 : | } | ||
666 : | close(SEQS); | ||
667 : | open(SEQS,"<$tmp_seqs") || die "could not open $tmp_seqs"; | ||
668 : | print $cgi->header; | ||
669 : | print "<pre>\n"; | ||
670 : | while (defined($_ = <SEQS>)) | ||
671 : | { | ||
672 : | print $_; | ||
673 : | } | ||
674 : | print "</pre>\n"; | ||
675 : | close(SEQS); | ||
676 : | unlink($tmp_seqs,$tmp_labels); | ||
677 : | } | ||
678 : | } | ||
679 : | sub show_options_for_otu { | ||
680 : | my($cgi,$g) = @_; | ||
681 : | |||
682 : | print $cgi->header; | ||
683 : | print "<h3>$g</h3>\n"; | ||
684 : | print "<ol>\n"; | ||
685 : | overbeek | 1.2 | print "<li> <a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_otu_tree&dataD=$g>Gains/losses of Gene Families</a>\n"; |
686 : | print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_kovbassa&dataD=$g>Genes that Distinguishing Families (Genes as Signatures)</a>\n"; | ||
687 : | print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_adjacency&dataD=$g>Changes in Adjacency</a>\n"; | ||
688 : | overbeek | 1.1 | print "</ol>\n"; |
689 : | |||
690 : | push(@$html,"<br>", | ||
691 : | overbeek | 1.2 | $cgi->start_form(-action => "./families_on_tree.cgi"), |
692 : | overbeek | 1.1 | $cgi->textfield(-name => 'keywords',-size => 100), |
693 : | $cgi->hidden(-override => 1,-name => 'request',-value => 'show_indexed_funcs'), | ||
694 : | $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD), | ||
695 : | $cgi->submit('show functions matching keywords'),$cgi->end_form(), | ||
696 : | "<hr>", | ||
697 : | &virulence_functions_link($cgi,$dataDF)); | ||
698 : | foreach $_ (@$html) { print $_ } | ||
699 : | |||
700 : | my $txt = <<END; | ||
701 : | <pre><br><hr> | ||
702 : | We think of changes as belonging to three broad classes: | ||
703 : | |||
704 : | 1. Genes that are gained or lost, | ||
705 : | |||
706 : | 2. Rearrangements, which we often think of as "changes in adjacency", and | ||
707 : | |||
708 : | 3. SNPs | ||
709 : | |||
710 : | Users concerned about issues relating to virulence will probably focus on the | ||
711 : | first two classes, unless that have high-quality sequences for a number of | ||
712 : | very close genomes (and they can separate these into virulent and nonvirulent). | ||
713 : | |||
714 : | On the other hand users interested in what makes a production strain display | ||
715 : | desirable properties may well have a number of very, very close strains, and for them | ||
716 : | SNPs might well be the most interesting form of change. | ||
717 : | |||
718 : | Lurking behind efforts to accurately display these three types of changes all | ||
719 : | depend heavily on our ability to match up genese between genomes. That is, | ||
720 : | there are (almost inevitably) a set of protein families that are the | ||
721 : | basis of comparison, and these are often of dubious quality. Be aware that | ||
722 : | we have attempted to build fairly accurate families of isofunctional homologs | ||
723 : | (i.e., genes that have the same function and a common ancestor), but they are | ||
724 : | far from perfect. | ||
725 : | |||
726 : | END | ||
727 : | print $txt; | ||
728 : | } | ||
729 : | |||
730 : | sub show_otus { | ||
731 : | my($cgi,$datadir) = @_; | ||
732 : | |||
733 : | print $cgi->header; | ||
734 : | if (opendir(GENERA,$dataDir)) | ||
735 : | { | ||
736 : | my @genera = grep { $_ !~ /^\./ } readdir(GENERA); | ||
737 : | closedir(GENERA); | ||
738 : | print "<h1>What Changed?</h1>\n"; | ||
739 : | print "<h2><a target=_blank href=\"http://bioseed.mcs.anl.gov/~overbeek/what_changed.html\">Getting Started: a short Tutorial</a></h2>\n"; | ||
740 : | print "<h2>Genera Available</h2>\n"; | ||
741 : | foreach my $g (sort @genera) | ||
742 : | { | ||
743 : | overbeek | 1.2 | print "<h3><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_options_for_otu&dataD=$g>$g</a>\n"; |
744 : | overbeek | 1.1 | } |
745 : | } | ||
746 : | else | ||
747 : | { | ||
748 : | print "<h1>The dataD parameter is invalid\n"; | ||
749 : | } | ||
750 : | } | ||
751 : | |||
752 : | sub peg_link { | ||
753 : | my($peg) = @_; | ||
754 : | |||
755 : | return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>"; | ||
756 : | } | ||
757 : | |||
758 : | sub compare_link { | ||
759 : | my($pegs) = @_; | ||
760 : | my @genomes = map { ($_ =~ /^fig\|(\d+\.\d+)/) ? $1 : () } @$pegs; | ||
761 : | my $args = join("&",map { "show_genome=$_" } @genomes); | ||
762 : | return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=" . | ||
763 : | $pegs->[0] . "&$args>Compare Regions</a>"; | ||
764 : | } | ||
765 : | |||
766 : | sub show_func_link { | ||
767 : | my($dataD,$function) = @_; | ||
768 : | |||
769 : | if (! $function) { return '' } | ||
770 : | my $functionQ = uri_escape($function); | ||
771 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_func&function=$functionQ&dataD=$dataD>$function</a>" |
772 : | overbeek | 1.1 | } |
773 : | |||
774 : | sub virulence_functions_link { | ||
775 : | my($cgi,$dataDF) = @_; | ||
776 : | |||
777 : | if ((-s "$dataDF/virulence.functions") && ($dataDF =~ /([^\/]+)$/)) | ||
778 : | { | ||
779 : | my $dataDQ = uri_escape($1); | ||
780 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_virulence_functions&dataD=$dataDQ>Some Posssible Functions Associated with Virulence</a>"; |
781 : | overbeek | 1.1 | } |
782 : | return ''; | ||
783 : | } | ||
784 : | |||
785 : | sub show_fam_on_tree_link { | ||
786 : | my($dataDF,$fam,$union) = @_; | ||
787 : | |||
788 : | $union = defined($union) ? $union : 0; | ||
789 : | if ($dataDF =~ /([^\/]+)$/) | ||
790 : | { | ||
791 : | my $dataDQ = uri_escape($1); | ||
792 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&union=$union&dataD=$dataDQ&request=show_family_tree>Family on Tree</a>"; |
793 : | overbeek | 1.1 | } |
794 : | return ''; | ||
795 : | } | ||
796 : | |||
797 : | sub show_node_link { | ||
798 : | my($dataD,$node,$type) = @_; | ||
799 : | if ($type eq "families") | ||
800 : | { | ||
801 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=families>$node</a>"; |
802 : | overbeek | 1.1 | } |
803 : | else | ||
804 : | { | ||
805 : | overbeek | 1.2 | return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=adjacency>$node</a>"; |
806 : | overbeek | 1.1 | } |
807 : | } | ||
808 : | |||
809 : | sub show_clusters_link { | ||
810 : | my($dataD,$family,$coupled) = @_; | ||
811 : | my %tmp = map { $_ => 1 } ($family,@$coupled); | ||
812 : | my $families = join(",",sort { $a <=> $b} keys(%tmp)); | ||
813 : | overbeek | 1.2 | return "<a target=_blank href=\"http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_clusters&dataD=$dataD&families=$families\">$family</a>"; |
814 : | overbeek | 1.1 | } |
815 : | |||
816 : | sub show_clusters { | ||
817 : | my($cgi,$dataDF,$html) = @_; | ||
818 : | |||
819 : | my $families = $cgi->param('families'); | ||
820 : | my @families = split(/,/,$families); | ||
821 : | my %families = map { $_ => 1 } @families; | ||
822 : | my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`; | ||
823 : | my @genome_pegN_fam_func = sort { ($a->[0] <=> $b->[0]) or ($a->[1] <=> $b->[1]) } | ||
824 : | map { (($_ =~ /^(\S+)\t([^\t]*)\t[^\t]*\tfig\|(\d+\.\d+)\.peg\.(\d+)/) && $families{$1}) ? | ||
825 : | [$3,$4,$1,$2] : () | ||
826 : | } `cat $dataDF/families.all`; | ||
827 : | push(@$html,$cgi->h1('Relevant Clusters')); | ||
828 : | my $col_hdrs = ['Family','Function','PEG']; | ||
829 : | my $last = shift @genome_pegN_fam_func; | ||
830 : | while ($last) | ||
831 : | { | ||
832 : | my $last_g = $last->[0]; | ||
833 : | my $last_pegN = $last->[1]; | ||
834 : | my @set; | ||
835 : | while ($last && ($last_g == $last->[0]) && &close($last_pegN,$last->[1])) | ||
836 : | { | ||
837 : | $last_pegN = $last->[1]; | ||
838 : | push(@set,[$last->[2],$last->[3],&peg_link("fig|" . $last_g . ".peg." . $last_pegN)]); | ||
839 : | $last = shift @genome_pegN_fam_func; | ||
840 : | } | ||
841 : | if (@set > 1) | ||
842 : | { | ||
843 : | push(@$html,&HTML::make_table($col_hdrs,\@set,"Cluster for $last_g: $genome_names{$last_g}")); | ||
844 : | push(@$html,"<hr><br><br>\n"); | ||
845 : | } | ||
846 : | } | ||
847 : | } | ||
848 : | |||
849 : | sub close { | ||
850 : | my($pegN1,$pegN2) = @_; | ||
851 : | |||
852 : | return abs($pegN2 - $pegN1) <= 7; | ||
853 : | } | ||
854 : | |||
855 : | sub show_kovbassa_signatures { | ||
856 : | my($cgi,$dataDF,$html) = @_; | ||
857 : | |||
858 : | $dataDF =~ /([^\/]+)$/; | ||
859 : | my $dataD = $1; | ||
860 : | my @tree; | ||
861 : | my @tmp = `cat $dataDF/readable.tree`; | ||
862 : | foreach $_ (@tmp) | ||
863 : | { | ||
864 : | if ($_ =~ /^(.*\+ )(n\d+)$/) | ||
865 : | { | ||
866 : | my $start = $1; | ||
867 : | my $node = $2; | ||
868 : | my $link = $cgi->radio_group(-nolabels => 1, | ||
869 : | -name => "radio_$node", | ||
870 : | -override => 1, | ||
871 : | -values => [1,0,2], | ||
872 : | -default => 0); | ||
873 : | push(@tree,"$start$node$link\n"); | ||
874 : | } | ||
875 : | elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) | ||
876 : | { | ||
877 : | my $start = $1; | ||
878 : | my $node = $2; | ||
879 : | my $end = $3; | ||
880 : | my $link = $cgi->radio_group(-nolabels => 1, | ||
881 : | -name => "radio_$node", | ||
882 : | -override => 1, | ||
883 : | -values => [1,0,2], | ||
884 : | -default => 0); | ||
885 : | push(@tree,"$start",$link,"$end\n"); | ||
886 : | } | ||
887 : | else | ||
888 : | { | ||
889 : | push(@tree,$_); | ||
890 : | } | ||
891 : | } | ||
892 : | my $tree = join("",@tree); | ||
893 : | push(@$html,"<br>", | ||
894 : | overbeek | 1.2 | $cgi->start_form(-action => "./families_on_tree.cgi"), |
895 : | overbeek | 1.1 | $cgi->hidden(-override => 1,-name => 'request',-value => 'compute_kovbassa_sigs'), |
896 : | $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD), | ||
897 : | "<pre>\n$tree\n</pre>\n", | ||
898 : | $cgi->submit('Compute Gene Signatures')); | ||
899 : | } | ||
900 : | |||
901 : | sub compute_kovbassa_signatures { | ||
902 : | my($cgi,$dataDF,$html) = @_; | ||
903 : | |||
904 : | $dataDF =~ /([^\/]+)$/; | ||
905 : | my $dataD = $1; | ||
906 : | my @out; | ||
907 : | my @in; | ||
908 : | my @params = grep { $_ =~ /^radio/ } $cgi->param(); | ||
909 : | foreach my $param (@params) | ||
910 : | { | ||
911 : | if ($param =~ /^radio_(\S+)/) | ||
912 : | { | ||
913 : | my $node = $1; | ||
914 : | if ($cgi->param($param) == 1) | ||
915 : | { | ||
916 : | push(@out,$node); | ||
917 : | } | ||
918 : | elsif ($cgi->param($param) == 2) | ||
919 : | { | ||
920 : | push(@in,$node); | ||
921 : | } | ||
922 : | } | ||
923 : | } | ||
924 : | my $families = "$dataDF/families.all"; | ||
925 : | my $tree = &parse_newick_tree(join("",`cat $dataDF/labeled.tree`)); | ||
926 : | my $indexP = &tree_utilities::tree_index_tables($tree); | ||
927 : | my $genomes = {}; | ||
928 : | &in_set(\@out,$tree,$indexP,1,$genomes); | ||
929 : | &in_set(\@in,$tree,$indexP,2,$genomes); | ||
930 : | my $s1N = grep { $genomes->{$_} == 1 } keys(%$genomes); | ||
931 : | my $s2N = grep { $genomes->{$_} == 2 } keys(%$genomes); | ||
932 : | |||
933 : | my %by_family; | ||
934 : | my %fam2func; | ||
935 : | foreach $_ (map { (($_ =~ /^(\S+)\t([^\t]*)\tfig\|(\d+\.\d+)/) && $genomes->{$3}) ? [$1,$2,$3] : () } | ||
936 : | `cut -f1,2,4 $families`) | ||
937 : | { | ||
938 : | my($fam,$func,$g) = @$_; | ||
939 : | $by_family{$fam}->{$g} = 1; | ||
940 : | $fam2func{$fam} = $func; | ||
941 : | } | ||
942 : | |||
943 : | my @scored_families; | ||
944 : | my %famH; | ||
945 : | foreach my $f (keys(%by_family)) | ||
946 : | { | ||
947 : | my $fH = $by_family{$f}; | ||
948 : | my @hits = keys(%$fH); | ||
949 : | my $s1Hits = grep { $genomes->{$_} == 1 } @hits; | ||
950 : | my $s2Hits = grep { $genomes->{$_} == 2 } @hits; | ||
951 : | my $counts1 = [$s1Hits,$s1N-$s1Hits]; | ||
952 : | my $counts2 = [$s2Hits,$s2N-$s2Hits]; | ||
953 : | my($din,$dout) = &discriminates($counts2,$counts1); | ||
954 : | my $disc = $din+$dout; | ||
955 : | if ($disc > 1.5) | ||
956 : | { | ||
957 : | $famH{$f} = 1; | ||
958 : | push(@scored_families,[sprintf("%0.3f",$disc),$f]); | ||
959 : | } | ||
960 : | } | ||
961 : | my $coupledH = &coupling_data($dataDF,\%famH); | ||
962 : | my $col_hdrs = ['Score','Tree','Family','Mean Length','Function','Clusters','Coupling']; | ||
963 : | my %mean_len = map { ($_ =~ /^(\S+)\t(\S+)/) ? ($1 => $2) : () } `cut -f1,6 $dataDF/families.all`; | ||
964 : | my $rows = []; | ||
965 : | foreach my $tuple (sort { $b->[0] <=> $a->[0] } @scored_families) | ||
966 : | { | ||
967 : | my($sc,$fam) = @$tuple; | ||
968 : | my $func = $fam2func{$fam}; | ||
969 : | my $func_link = &show_func_link($dataD,$func); | ||
970 : | my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($fam,$coupledH,\%fam2func,$dataD); | ||
971 : | push(@$rows,[$sc, | ||
972 : | &show_fam_links($dataDF,$fam), | ||
973 : | $mean_len{$fam}, | ||
974 : | $func_link, | ||
975 : | $cluster_link, | ||
976 : | $coupled_html | ||
977 : | ]); | ||
978 : | } | ||
979 : | push(@$html,&HTML::make_table($col_hdrs,$rows,"Families that Distinguish")); | ||
980 : | } | ||
981 : | |||
982 : | sub in_set { | ||
983 : | my($nodes,$tree,$indexP,$which,$genomes) = @_; | ||
984 : | |||
985 : | foreach my $x (@$nodes) | ||
986 : | { | ||
987 : | if ($x =~ /^(\d+\.\d+)/) | ||
988 : | { | ||
989 : | $genomes->{$1} = $which; | ||
990 : | } | ||
991 : | elsif (($x =~ /^(n\d+)/) && (my $node = &label_to_node($indexP,$1))) | ||
992 : | { | ||
993 : | my $tips = &tips_of_tree($node); | ||
994 : | foreach my $tip (@$tips) | ||
995 : | { | ||
996 : | $genomes->{$tip} = $which; | ||
997 : | } | ||
998 : | } | ||
999 : | } | ||
1000 : | } | ||
1001 : | |||
1002 : | sub discriminates { | ||
1003 : | my($xin,$xout) = @_; | ||
1004 : | |||
1005 : | my $sx = &vector_sum($xin); | ||
1006 : | my $sy = &vector_sum($xout); | ||
1007 : | my $xy = &scalar_product($xin,$xout); | ||
1008 : | my $xx = &scalar_product($xin,$xin); | ||
1009 : | my $yy = &scalar_product($xout,$xout); | ||
1010 : | |||
1011 : | my $din = (($sx != 0) && ($yy != 0)) ? (1 - (($sy * $xy) / ($sx * $yy))) : 0; | ||
1012 : | my $dout = (($sy != 0) && ($xx != 0)) ? (1 - (($sx * $xy) / ($sy * $xx))) : 0; | ||
1013 : | return ($din,$dout); | ||
1014 : | } | ||
1015 : | |||
1016 : | sub vector_sum { | ||
1017 : | my($v) = @_; | ||
1018 : | |||
1019 : | my $sum = 0; | ||
1020 : | foreach $_ (@$v) | ||
1021 : | { | ||
1022 : | $sum += $_; | ||
1023 : | } | ||
1024 : | return $sum; | ||
1025 : | } | ||
1026 : | |||
1027 : | sub scalar_product { | ||
1028 : | my($x,$y) = @_; | ||
1029 : | |||
1030 : | if (@$x != @$y) | ||
1031 : | { | ||
1032 : | print STDERR &Dumper($x,$y); | ||
1033 : | die "incompatable"; | ||
1034 : | } | ||
1035 : | my $i; | ||
1036 : | my $sum = 0; | ||
1037 : | for ($i=0; ($i < @$x); $i++) | ||
1038 : | { | ||
1039 : | $sum += $x->[$i] * $y->[$i]; | ||
1040 : | } | ||
1041 : | return $sum; | ||
1042 : | } |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |