[Bio] / FigWebServices / sigs.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/sigs.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.52 - (view) (download)

1 : overbeek 1.15 ##################################################################
2 : olson 1.14 # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 : overbeek 1.15 ##################################################################
18 : olson 1.14
19 : overbeek 1.49 require SproutFIG;
20 : efrank 1.1 use FIG;
21 : overbeek 1.49 use FIGV;
22 : efrank 1.1
23 :     use HTML;
24 :     use CGI;
25 : redwards 1.50 use CGI::Carp qw(fatalsToBrowser);
26 : parrello 1.20 use Tracer;
27 : parrello 1.36 use PageBuilder;
28 : efrank 1.1 my $cgi = new CGI;
29 :    
30 : overbeek 1.8 if (0) {
31 :     my $VAR1;
32 :     eval(join("",`cat /tmp/sig_parms`));
33 :     $cgi = $VAR1;
34 :     # print STDERR &Dumper($cgi);
35 :     }
36 :    
37 :     if (0) {
38 : efrank 1.1 print $cgi->header;
39 :     my @params = $cgi->param;
40 :     print "<pre>\n";
41 : overbeek 1.8 foreach $_ (@params) {
42 : parrello 1.36 print "$_\t:",join(",",$cgi->param($_)),":\n";
43 : overbeek 1.8 }
44 :    
45 :     if (0) {
46 :     if (open(TMP,">/tmp/sig_parms")) {
47 : parrello 1.36 print TMP &Dumper($cgi);
48 :     close(TMP);
49 :     }
50 : efrank 1.1 }
51 :     exit;
52 :     }
53 :    
54 : overbeek 1.49 my $org_dir;
55 :     $org_dir = $cgi->param('new_genome');
56 :    
57 : efrank 1.1 my $html = [];
58 :    
59 : overbeek 1.9 my($fig_or_sprout);
60 : parrello 1.52 if (FIGRules::nmpdr_mode($cgi)) {
61 : overbeek 1.9 $is_sprout = 1;
62 :     $fig_or_sprout = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
63 :     } else {
64 :     $is_sprout = 0;
65 : overbeek 1.49 $fig_or_sprout = new FIGV($org_dir);;
66 : overbeek 1.9 unshift @$html, "<TITLE>The SEED Signature Genes Page</TITLE>\n";
67 :     }
68 : parrello 1.45
69 :     # Initialize tracing.
70 : parrello 1.46 ETracing($cgi);
71 : parrello 1.45
72 : overbeek 1.42 my $bbhs = $cgi->param('bbhs');
73 :    
74 : efrank 1.1 my @tmp = grep { $_ =~ /^\d+\.\d+$/ } $cgi->param;
75 :     my @set1 = grep { $cgi->param($_) eq "set1" } @tmp;
76 :     my @set2 = grep { $cgi->param($_) eq "set2" } @tmp;
77 :    
78 :     my $given = $cgi->param('given');
79 :    
80 : parrello 1.41 # Determine the operating mode.
81 :     my $submit1 = $cgi->param("Find Common Proteins");
82 :     my $submit2 = $cgi->param("Find Discriminating Proteins");
83 :     # This will be set to 1 for common proteins and 2 for discriminating proteins.
84 :     my $mode = 0;
85 :    
86 :     if (! $given && ($submit1 || $submit2)) {
87 :     push @$html, $cgi->h3("You must select a given organism.");
88 :     } else {
89 :     # Here the user wantsd to process data. Insure the given organism is in
90 :     # set 1.
91 :     my @found = grep { $_ eq $given } @set1;
92 :     if (! @found) {
93 :     push @set1, $given;
94 :     }
95 :     if ($submit1) {
96 :     $mode = 1;
97 :     if (@set2 > 0) {
98 :     push @$html, $cgi->h3("Organisms from set 2 were ignored.");
99 :     }
100 :     } elsif ($submit2) {
101 :     if (@set2 == 0) {
102 :     push @$html, $cgi->h3("Could not process: set 2 was empty.");
103 :     } else {
104 :     $mode = 2;
105 :     }
106 :     }
107 :     }
108 :    
109 :     if ($mode == 0) {
110 : efrank 1.1
111 :     $col_hdrs = ["Given","Set 1","","Set 2","genome","Genus/Species"];
112 :     $tab = [];
113 : parrello 1.45 Trace("Retrieving genomes.") if T(2);
114 : overbeek 1.9 @orgs = $fig_or_sprout->genomes("complete");
115 : redwards 1.48 $cgi->param('Environmental') ? (@orgs=$fig_or_sprout->genomes(undef, undef, "Environmental Sample")) : 1;
116 : redwards 1.50 if ($cgi->param("limittoorgs"))
117 :     {
118 :     my $sometext=$cgi->param("limittoorgs");
119 :     @orgs = grep {$fig_or_sprout->genus_species($_) =~ /$sometext/i} @orgs;
120 :     }
121 : parrello 1.45 Trace("Processing genomes.") if T(2);
122 : efrank 1.1 foreach $org (@orgs)
123 :     {
124 : parrello 1.36 $full{$org} = $fig_or_sprout->genus_species($org);
125 : efrank 1.1 }
126 : redwards 1.50 @orgs = sort { uc($full{$a}) cmp uc($full{$b}) } @orgs;
127 : efrank 1.1
128 :     @given = $cgi->radio_group(-name => 'given',
129 : parrello 1.36 -values => [@orgs],
130 :     -nolabels => 1);
131 : parrello 1.41 # We're going to create a navigation bar here that gets you automatically
132 :     # to the first organism beginning with a specified letter.
133 : parrello 1.45 Trace("Creating navigation bar.") if T(2);
134 : redwards 1.50 my $bar = $cgi->a({ name => "bar" },"Jump to: ");
135 : parrello 1.41 my $currLetter = "";
136 : efrank 1.1 foreach $org (@orgs)
137 :     {
138 : parrello 1.41 my $name = $full{$org};
139 :     my $backToTop = "";
140 : redwards 1.50 if (uc(substr($name, 0, 1)) ne $currLetter) {
141 : parrello 1.41 # Here we have a navigation target: the first organism whose name begins
142 :     # with a new letter. We put in the name label
143 : redwards 1.50 $currLetter = uc(substr($name, 0, 1));
144 : parrello 1.41 $bar .= $cgi->a({ href => "#$currLetter" }, $currLetter) . " ";
145 :     $name = $cgi->a({ name => $currLetter },"") . $name;
146 :     $backToTop = $cgi->a({ href => "#bar"}, "TOP");
147 :     }
148 : parrello 1.36 push(@$tab,[shift @given,
149 : efrank 1.1 $cgi->radio_group(-name => $org,
150 : parrello 1.36 -default => 'neither',
151 :     -values => ['set1','neither','set2'],
152 :     -nolabels => 1
153 :     ),
154 :     $org,
155 : parrello 1.41 $name, $backToTop]);
156 : efrank 1.1 }
157 : overbeek 1.10 my $sprout = $cgi->param('SPROUT') ? 1 : 0;
158 : parrello 1.41 # Create the navigation bar.
159 : efrank 1.1 push(@$html,$cgi->start_form(-action => 'sigs.cgi'),
160 : overbeek 1.42 $cgi->hidden(-name => 'SPROUT', -value => $sprout));
161 :     if (! $sprout)
162 :     {
163 :     push(@$html,"<br>",$cgi->checkbox( -name => 'bbhs', -value => 1, -override => 1, -label => 'Use BBHs' ),"<br>");
164 :     }
165 : parrello 1.45 Trace("Creating main form.") if T(2);
166 : overbeek 1.42 push(@$html,$cgi->h3("Find Proteins from Given Organism that Discriminate Set 1 from Set 2"),
167 : parrello 1.36 $cgi->br, "Similarity Cutoff: ",$cgi->textfield(-name => "cutoff", -size => 10, -value => 1.0e-10),
168 :     $cgi->br,
169 :     $cgi->checkbox( -name => 'sort_by_func', -value => 1, -override => 1, -checked => 0, -label => 'Sort by Function'),
170 :     $cgi->br,
171 :     $cgi->checkbox( -name => 'write_tab', -value => 1, -override => 1, -checked => 0, -label => 'Export Tab Delimited Table'),
172 :     $cgi->br,
173 : overbeek 1.42 $cgi->br,
174 : parrello 1.41 $cgi->submit("Find Discriminating Proteins"),$cgi->reset,
175 :     $cgi->br,
176 : parrello 1.36 $cgi->br,
177 : parrello 1.41 $cgi->h3("Find Proteins from Given Organism that are Common to Set 1"),
178 : parrello 1.36 $cgi->br,
179 : parrello 1.41 "Minimum matches from Set 1: ", $cgi->textfield(-name => minN, -size => 5),
180 : parrello 1.36 $cgi->br,
181 : parrello 1.41 $cgi->submit("Find Common Proteins"),$cgi->reset,
182 :     $cgi->br, $cgi->br,
183 :     $cgi->h3("Select Given, Set 1, and Set 2"),
184 :     $cgi->p($bar),
185 : redwards 1.50 $cgi->p("Limit table to some organisms with some text: ", $cgi->textfield(-name=>"limittoorgs", -size=>20, -label=>"")),
186 :     $cgi->submit("Refresh Table"), $cgi->reset,
187 : parrello 1.41 &HTML::make_table($col_hdrs,$tab,"Pick organisms for Set 1 and Set 2"),
188 :     $cgi->br);
189 : parrello 1.28 if ($cgi->param('TTYPE')) {
190 :     push @$html,
191 :     $cgi->br,
192 :     $cgi->br, "Tracing: ",$cgi->textfield(-name => "TRACE", -size => 30, -value => ""),
193 :     $cgi->hidden(-name => 'TTYPE', -value => $cgi->param('TTYPE'));
194 :     }
195 :     push @$html,
196 : parrello 1.36 $cgi->end_form,
197 :     "\n";
198 : parrello 1.41 } else {
199 :     # Mode 1 or 2
200 : overbeek 1.51 my $sim_cutoff = $cgi->param('cutoff');
201 : efrank 1.1 if (! $sim_cutoff) { $sim_cutoff = 1.0e-10 }
202 : parrello 1.41 my @hits;
203 :     if ($mode == 2) {
204 : parrello 1.45 Trace("Computing discriminants.") if T(2);
205 : overbeek 1.42 @hits = &differentiating_genes(\@set1,\@set2,$given,$sim_cutoff,$is_sprout,$bbhs);
206 : parrello 1.41 Trace(scalar(@hits) . " hits found by differentiating_genes.") if T(3);
207 :     if ($cgi->param('sort_by_func')) {
208 :     @hits = sort { ($a->[2] cmp $b->[2]) or ($b->[1] <=> $a->[1]) or (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
209 :     } else {
210 :     @hits = sort { ($b->[1] <=> $a->[1]) || (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
211 :     }
212 : efrank 1.1
213 : parrello 1.41 if ($cgi->param('write_tab')) {
214 :     push(@$html,"<pre>\n");
215 :     foreach $_ (@hits) {
216 :     push(@$html,join("\t",@$_) . "\n");
217 : parrello 1.36 }
218 : parrello 1.41 push(@$html,"</pre>\n");
219 :     } else {
220 : overbeek 1.42 $col_hdrs = ["","Gene","Score","Function","Subsystems"];
221 : parrello 1.41 $tab = [];
222 :     my $gs = $fig_or_sprout->genus_species($given);
223 : redwards 1.50 $title = "There are ", scalar(@hits), " genes in $gs that discriminate these sets";
224 : parrello 1.41 my $subscript = 1;
225 :     foreach $_ (@hits) {
226 :     my($peg,$score,$function) = @$_;
227 : parrello 1.47 my @sub = grep { $fig_or_sprout->usable_subsystem($_) } $fig_or_sprout->peg_to_subsystems($peg);
228 :     my $sprout_param = ($cgi->param('SPROUT') ? "&SPROUT=1" : "");
229 :     my $user = $cgi->param('user') || "";
230 :     my @linkedSubs = map { $cgi->a({href => "./display_subsys.cgi?ssa_name=$_$sprout_param&request=show_ssa&user=$user&focus=$peg&show_clusters=1"},
231 :     $_) } @sub;
232 :     push(@$tab,[$subscript,&HTML::fid_link($cgi,$peg,"local"),$score,$function,join("<br>",@linkedSubs)]);
233 : parrello 1.41 $subscript++;
234 : parrello 1.36 }
235 : parrello 1.41 push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
236 :     }
237 :     } else {
238 :     # Mode 1
239 :     my($i,$j,%which_col,$peg1,$func1,$link,$genome1,$hit);
240 :     my $minN = $cgi->param('minN');
241 :     $minN = $minN ? $minN : @set1;
242 : overbeek 1.42 @hits = &common_genes(\@set1,$given,$sim_cutoff,$minN,$is_sprout,$bbhs);
243 : parrello 1.41 $col_hdrs = ["",&FIG::abbrev($fig_or_sprout->genus_species($given)),"Score","Function"];
244 :     for ($i=0; ($i < @set1); $i++) {
245 :     $which_col{$set1[$i]} = $i+4;
246 :     push(@$col_hdrs,&FIG::abbrev($fig_or_sprout->genus_species($set1[$i])));
247 :     }
248 :    
249 :     $tab = [];
250 :     $title = "Genes in Common";
251 : parrello 1.36
252 : parrello 1.41 for ($j=0; ($j < @hits); $j++) {
253 :     my($peg,$score,$hits) = @{$hits[$j]};
254 :     my $func = scalar $fig_or_sprout->function_of($peg,$cgi->param('user'));
255 :     my $row = [$j+1,&HTML::fid_link($cgi,$peg,"local"),$score,$func];
256 :     for ($i=0; ($i < @set1); $i++) {
257 :     push(@$row,"");
258 : parrello 1.36 }
259 : parrello 1.41 foreach $hit (@$hits) {
260 :     ($peg1,$func1) = @$hit;
261 :     $genome1 = &FIG::genome_of($peg1);
262 :     $col = $which_col{$genome1};
263 :     $link = &HTML::fid_link($cgi,$peg1,"local");
264 :     $row->[$col] = ($func eq $func1) ? $link : "*$link";
265 : parrello 1.36 }
266 : parrello 1.41 push(@$tab,$row);
267 : parrello 1.36 }
268 : parrello 1.41 push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
269 : efrank 1.1 }
270 :     }
271 : parrello 1.35 if ($is_sprout) {
272 :     # For sprout, we use a template.
273 : parrello 1.39 my $template = "$FIG_Config::template_url/Sigs_tmpl.php";
274 : parrello 1.45 my $traceList = QTrace("html");
275 : parrello 1.40 my $result = PageBuilder::Build($template, { data => join("\n", @$html),
276 :     tracing => $traceList }, "Html");
277 : parrello 1.38 print "CONTENT-TYPE: text/html\n\n";
278 : parrello 1.39 print $result;
279 : parrello 1.35 } else {
280 :     # For SEED, a normal SEED page.
281 : parrello 1.45 push @$html, QTrace("html");
282 : parrello 1.35 &HTML::show_page($cgi,$html);
283 :     }
284 : efrank 1.1
285 : overbeek 1.6 sub common_genes {
286 : overbeek 1.42 my($set1,$given,$sim_cutoff,$minN,$is_sprout,$bbhs) = @_;
287 : overbeek 1.6 my %set1 = map { $_ => 1 } @$set1;
288 :     if ($set1{$given})
289 :     {
290 : parrello 1.36 $minN--;
291 :     $set1{$given} = 0;
292 : overbeek 1.6 }
293 :    
294 : overbeek 1.9 foreach $peg ($fig_or_sprout->all_features($given,"peg"))
295 : overbeek 1.6 {
296 : parrello 1.36 undef %hits_set1;
297 : overbeek 1.42 foreach $sim (($is_sprout || $bbhs) ? $fig_or_sprout->bbhs($peg, $sim_cutoff) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
298 : parrello 1.36 {
299 : overbeek 1.42 $id2 = ($is_sprout || $bbhs) ? $sim->[0] : $sim->[1]; # id2
300 : parrello 1.36 if ($id2 =~ /^fig\|(\d+\.\d+)/)
301 :     {
302 :     my $org1 = $1;
303 :     if ($set1{$org1})
304 :     {
305 :     if (! $hits_set1{$org1})
306 :     {
307 :     $hits_set1{$org1} = $id2;
308 :     }
309 :     }
310 :     }
311 :     }
312 :     $sc = keys(%hits_set1);
313 :     if ($sc >= $minN)
314 :     {
315 :     push(@hits,[$peg,$sc,[map { [$hits_set1{$_}, scalar $fig_or_sprout->function_of($hits_set1{$_})] } keys(%hits_set1)]]);
316 :     }
317 : overbeek 1.6 }
318 :     return @hits;
319 :     }
320 : parrello 1.36
321 : efrank 1.1 sub differentiating_genes {
322 : overbeek 1.42 my($set1,$set2,$given,$sim_cutoff,$is_sprout,$bbhs) = @_;
323 : efrank 1.1 my %set1 = map { $_ => 1 } @$set1;
324 :     my %set2 = map { $_ => 1 } @$set2;
325 :    
326 :     my(%hits_set1,%hits_set2,@hits,$sim,$id2,$peg);
327 : parrello 1.40 my $pegCount = 0;
328 :     my $simCount = 0;
329 : overbeek 1.9 foreach $peg ($fig_or_sprout->all_features($given,"peg"))
330 : efrank 1.1 {
331 : parrello 1.40 $pegCount++;
332 : parrello 1.36 undef %hits_set1; undef %hits_set2;
333 :     $hits_set1{&FIG::genome_of($peg)} = 1;
334 : parrello 1.40 Trace("Processing $peg.") if T(4);
335 : overbeek 1.42 foreach $sim (($is_sprout || $bbhs) ? $fig_or_sprout->bbhs($peg, $sim_cutoff) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
336 : parrello 1.36 {
337 : parrello 1.40 $simCount++;
338 : overbeek 1.42 $id2 = ($is_sprout || $bbhs) ? $sim->[0] : $sim->[1];
339 : parrello 1.23 Trace("SIG tool sim check for $peg vs. $id2.") if T(4);
340 : parrello 1.36 if ($id2 =~ /^fig\|(\d+\.\d+)/)
341 :     {
342 :     my $org1 = $1;
343 :     if ($set1{$org1})
344 :     {
345 :     $hits_set1{$org1} = 1;
346 :     }
347 :     elsif ($set2{$org1})
348 :     {
349 :     $hits_set2{$org1} = 1;
350 :     }
351 :     }
352 :     }
353 :     my $n_set1 = keys(%hits_set1);
354 :     my $n_set2 = keys(%hits_set2);
355 :    
356 :     # my $sc = sprintf "%.3f", ($n_set1 / @set1) - (($n_set2 / @set2) * $fudge);
357 :     my $sc = sprintf "%.3f", &sig($n_set1,(@set1 - $n_set1),$n_set2,(@set2 - $n_set2));
358 :     if ($sc >= 1)
359 :     {
360 :     push(@hits,[$peg,$sc,scalar $fig_or_sprout->function_of($peg)]);
361 :     }
362 : efrank 1.1 }
363 : parrello 1.40 Trace(scalar(@hits) . " hits found by differentiator off of $simCount sims for $pegCount pegs with cutoff $sim_cutoff.") if T(3);
364 : efrank 1.1 return @hits;
365 :     }
366 :    
367 :     sub sig {
368 :     my($in_has,$in_has_not,$out_has,$out_has_not) = @_;
369 :     my($sx,$sy,$xx,$xy,$yy,$din,$dout);
370 :     $sx = $in_has + $in_has_not;
371 :     $sy = $out_has + $out_has_not;
372 :     $xx = ($in_has * $in_has) + ($in_has_not * $in_has_not);
373 :     $xy = ($in_has * $out_has) + ($in_has_not * $out_has_not);
374 :     $yy = ($out_has * $out_has) + ($out_has_not * $out_has_not);
375 :     (($sx > 0) && ($yy > 0) && ($sy > 0) && ($xx > 0)) || return 0;
376 :     $din = 1 - (($sy * $xy) / ($sx * $yy));
377 :     $dout = 1 - (($sx * $xy) / ($sy * $xx));
378 :     return $din+$dout;
379 :     }
380 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3