[Bio] / FigWebServices / sigs.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/sigs.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (view) (download)

1 : overbeek 1.15 ##################################################################
2 : olson 1.14 # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 : overbeek 1.15 ##################################################################
18 : olson 1.14
19 : efrank 1.1 use FIG;
20 : olson 1.13 require SproutFIG;
21 : efrank 1.1
22 :     use HTML;
23 :     use CGI;
24 : parrello 1.20 use Tracer;
25 : efrank 1.1 my $cgi = new CGI;
26 :    
27 : overbeek 1.8 if (0) {
28 :     my $VAR1;
29 :     eval(join("",`cat /tmp/sig_parms`));
30 :     $cgi = $VAR1;
31 :     # print STDERR &Dumper($cgi);
32 :     }
33 :    
34 :     if (0) {
35 : efrank 1.1 print $cgi->header;
36 :     my @params = $cgi->param;
37 :     print "<pre>\n";
38 : overbeek 1.8 foreach $_ (@params) {
39 :     print "$_\t:",join(",",$cgi->param($_)),":\n";
40 :     }
41 :    
42 :     if (0) {
43 :     if (open(TMP,">/tmp/sig_parms")) {
44 :     print TMP &Dumper($cgi);
45 :     close(TMP);
46 :     }
47 : efrank 1.1 }
48 :     exit;
49 :     }
50 :    
51 :     my $html = [];
52 :    
53 : overbeek 1.9 my($fig_or_sprout);
54 :     if ($cgi->param('SPROUT')) {
55 :     $is_sprout = 1;
56 :     $fig_or_sprout = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
57 : parrello 1.21 unshift @$html, "<TITLE>The NMPDR Signature Genes Page v2</TITLE>\n";
58 : overbeek 1.9 } else {
59 :     $is_sprout = 0;
60 :     $fig_or_sprout = new FIG;
61 :     unshift @$html, "<TITLE>The SEED Signature Genes Page</TITLE>\n";
62 :     }
63 : parrello 1.20 my $tracing = $cgi->param('TRACE');
64 :     if ($tracing) {
65 :     TSetup($tracing, "QUEUE");
66 :     }
67 : overbeek 1.9
68 : efrank 1.1 my @tmp = grep { $_ =~ /^\d+\.\d+$/ } $cgi->param;
69 :     my @set1 = grep { $cgi->param($_) eq "set1" } @tmp;
70 :     my @set2 = grep { $cgi->param($_) eq "set2" } @tmp;
71 :    
72 :     my $given = $cgi->param('given');
73 :    
74 :     if (((@set1 == 0) && (@set2 == 0)) || (! $given))
75 :     {
76 :    
77 :     $col_hdrs = ["Given","Set 1","","Set 2","genome","Genus/Species"];
78 :     $tab = [];
79 :    
80 : overbeek 1.9 @orgs = $fig_or_sprout->genomes("complete");
81 : efrank 1.1
82 :     foreach $org (@orgs)
83 :     {
84 : overbeek 1.9 $full{$org} = $fig_or_sprout->genus_species($org);
85 : efrank 1.1 }
86 :     @orgs = sort { $full{$a} cmp $full{$b} } @orgs;
87 :    
88 :     @given = $cgi->radio_group(-name => 'given',
89 :     -values => [@orgs],
90 :     -nolabels => 1);
91 :    
92 :     foreach $org (@orgs)
93 :     {
94 :     push(@$tab,[shift @given,
95 :     $cgi->radio_group(-name => $org,
96 :     -default => 'neither',
97 :     -values => ['set1','neither','set2'],
98 :     -nolabels => 1
99 :     ),
100 :     $org,
101 :     $full{$org}]);
102 :     }
103 : overbeek 1.10 my $sprout = $cgi->param('SPROUT') ? 1 : 0;
104 : efrank 1.1 push(@$html,$cgi->start_form(-action => 'sigs.cgi'),
105 : overbeek 1.10 $cgi->hidden(-name => 'SPROUT', -value => $sprout),
106 : overbeek 1.6 $cgi->h1("Find Proteins that Discriminate Two Sets of Organisms or Are Common to a Set of Organisms"),
107 : efrank 1.1 &HTML::make_table($col_hdrs,$tab,"Pick organisms for Set 1 and Set 2"),
108 : parrello 1.20 $cgi->br, "Similarity Cutoff: ",$cgi->textfield(-name => "cutoff", -size => 10, -value => 1.0e-10),
109 : overbeek 1.7 $cgi->br,
110 : overbeek 1.15 $cgi->checkbox( -name => 'sort_by_func', -value => 1, -override => 1, -checked => 0, -label => 'Sort by Function'),
111 :     $cgi->br,
112 :     $cgi->checkbox( -name => 'write_tab', -value => 1, -override => 1, -checked => 0, -label => 'Export Tab Delimited Table'),
113 : parrello 1.20 $cgi->br,
114 : parrello 1.21 $cgi->br, "Tracing: ",$cgi->textfield(-name => "TRACE", -size => 30, -value => ""),
115 : overbeek 1.15 $cgi->br,
116 :     $cgi->br,
117 : overbeek 1.11 $cgi->submit("Find the Discriminating Proteins from Given Organism"),$cgi->reset,
118 : overbeek 1.6 $cgi->br,
119 :     $cgi->br,
120 :     $cgi->submit("Find Genes from Checked Organism and in Organisms from Set 1"),
121 :     " (minimum matches from Set 1: )", $cgi->textfield(-name => minN, -size => 5),
122 : efrank 1.1 $cgi->end_form,
123 :     "\n");
124 :     }
125 :     else
126 :     {
127 :     my $sim_cutoff = $cgi->param('sim_cutoff');
128 :     if (! $sim_cutoff) { $sim_cutoff = 1.0e-10 }
129 :    
130 : overbeek 1.6 if (@set1 > 0)
131 :     {
132 :     my @hits;
133 :     if (@set2 > 0)
134 :     {
135 :     @hits = &differentiating_genes(\@set1,\@set2,$given,$sim_cutoff);
136 : parrello 1.20 Trace(scalar(@hits) . " hits found by differentiating_genes.") if T(3);
137 : overbeek 1.15 if ($cgi->param('sort_by_func'))
138 :     {
139 :     @hits = sort { ($a->[2] cmp $b->[2]) or ($b->[1] <=> $a->[1]) or (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
140 :     }
141 :     else
142 :     {
143 :     @hits = sort { ($b->[1] <=> $a->[1]) || (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
144 :     }
145 :    
146 :     if ($cgi->param('write_tab'))
147 :     {
148 :     push(@$html,"<pre>\n");
149 :     foreach $_ (@hits)
150 :     {
151 :     push(@$html,join("\t",@$_) . "\n");
152 :     }
153 :     push(@$html,"</pre>\n");
154 :     }
155 :     else
156 : overbeek 1.6 {
157 : overbeek 1.15 $col_hdrs = ["","Gene","Score","Function"];
158 :     $tab = [];
159 :     my $gs = $fig_or_sprout->genus_species($given);
160 :     $title = "Genes in $gs that Discriminate";
161 :     my $subscript = 1;
162 :     foreach $_ (@hits)
163 :     {
164 :     my($peg,$score,$function) = @$_;
165 :     push(@$tab,[$subscript,&HTML::fid_link($cgi,$peg,"local"),$score,$function]);
166 :     $subscript++;
167 :     }
168 :    
169 :     push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
170 : overbeek 1.6 }
171 :     }
172 :     else
173 :     {
174 :     my($i,$j,%which_col,$peg1,$func1,$link,$genome1,$hit);
175 :     my $minN = $cgi->param('minN');
176 :     $minN = $minN ? $minN : @set1;
177 :     @hits = &common_genes(\@set1,$given,$sim_cutoff,$minN);
178 : overbeek 1.9 $col_hdrs = ["",&FIG::abbrev($fig_or_sprout->genus_species($given)),"Score","Function"];
179 : overbeek 1.6 for ($i=0; ($i < @set1); $i++)
180 :     {
181 :     $which_col{$set1[$i]} = $i+4;
182 : overbeek 1.9 push(@$col_hdrs,&FIG::abbrev($fig_or_sprout->genus_species($set1[$i])));
183 : overbeek 1.6 }
184 :    
185 :     $tab = [];
186 :     $title = "Genes in Common";
187 : efrank 1.1
188 : overbeek 1.6 for ($j=0; ($j < @hits); $j++)
189 :     {
190 :     my($peg,$score,$hits) = @{$hits[$j]};
191 : parrello 1.17 my $func = scalar $fig_or_sprout->function_of($peg,$cgi->param('user'));
192 : overbeek 1.6 my $row = [$j+1,&HTML::fid_link($cgi,$peg,"local"),$score,$func];
193 :     for ($i=0; ($i < @set1); $i++)
194 :     {
195 :     push(@$row,"");
196 :     }
197 :     foreach $hit (@$hits)
198 :     {
199 :     ($peg1,$func1) = @$hit;
200 :     $genome1 = &FIG::genome_of($peg1);
201 :     $col = $which_col{$genome1};
202 :     $link = &HTML::fid_link($cgi,$peg1,"local");
203 :     $row->[$col] = ($func eq $func1) ? $link : "*$link";
204 :     }
205 :     push(@$tab,$row);
206 :     }
207 :     push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
208 :     }
209 :     }
210 :     else
211 : efrank 1.1 {
212 : overbeek 1.6 push(@$html,$cgi->h1("You need to fill in at least Set1"));
213 : efrank 1.1 }
214 :     }
215 :     &HTML::show_page($cgi,$html);
216 :    
217 : overbeek 1.6 sub common_genes {
218 :     my($set1,$given,$sim_cutoff,$minN) = @_;
219 : parrello 1.16 warn "sig: common genes.\n";
220 : overbeek 1.6 my %set1 = map { $_ => 1 } @$set1;
221 :     if ($set1{$given})
222 :     {
223 :     $minN--;
224 :     $set1{$given} = 0;
225 :     }
226 :    
227 : overbeek 1.9 foreach $peg ($fig_or_sprout->all_features($given,"peg"))
228 : overbeek 1.6 {
229 :     undef %hits_set1;
230 : parrello 1.19 foreach $sim ($fig_or_sprout->bbhs($peg)) # sims($peg, 1000, $sim_cutoff, "fig"))
231 : overbeek 1.6 {
232 : parrello 1.17 $id2 = $sim->[0]; # id2
233 : overbeek 1.6 if ($id2 =~ /^fig\|(\d+\.\d+)/)
234 :     {
235 :     my $org1 = $1;
236 :     if ($set1{$org1})
237 :     {
238 :     if (! $hits_set1{$org1})
239 :     {
240 :     $hits_set1{$org1} = $id2;
241 :     }
242 :     }
243 :     }
244 :     }
245 :     $sc = keys(%hits_set1);
246 :     if ($sc >= $minN)
247 :     {
248 : parrello 1.17 push(@hits,[$peg,$sc,[map { [$hits_set1{$_}, scalar $fig_or_sprout->function_of($hits_set1{$_})] } keys(%hits_set1)]]);
249 : overbeek 1.6 }
250 :     }
251 :     return @hits;
252 :     }
253 :    
254 : efrank 1.1 sub differentiating_genes {
255 :     my($set1,$set2,$given,$sim_cutoff) = @_;
256 : parrello 1.16 warn "sigs: differentiating.\n";
257 : efrank 1.1 my %set1 = map { $_ => 1 } @$set1;
258 :     my %set2 = map { $_ => 1 } @$set2;
259 :    
260 :     my(%hits_set1,%hits_set2,@hits,$sim,$id2,$peg);
261 :    
262 : overbeek 1.9 foreach $peg ($fig_or_sprout->all_features($given,"peg"))
263 : efrank 1.1 {
264 :     undef %hits_set1; undef %hits_set2;
265 : overbeek 1.4 $hits_set1{&FIG::genome_of($peg)} = 1;
266 :    
267 : parrello 1.19 foreach $sim ($fig_or_sprout->bbhs($peg)) # sims($peg, 1000, $sim_cutoff, "fig"))
268 : efrank 1.1 {
269 : parrello 1.17 $id2 = $sim->[0]; # id2;
270 : efrank 1.1 if ($id2 =~ /^fig\|(\d+\.\d+)/)
271 :     {
272 :     my $org1 = $1;
273 :     if ($set1{$org1})
274 :     {
275 :     $hits_set1{$org1} = 1;
276 :     }
277 :     elsif ($set2{$org1})
278 :     {
279 :     $hits_set2{$org1} = 1;
280 :     }
281 :     }
282 :     }
283 :     my $n_set1 = keys(%hits_set1);
284 :     my $n_set2 = keys(%hits_set2);
285 :    
286 :     # my $sc = sprintf "%.3f", ($n_set1 / @set1) - (($n_set2 / @set2) * $fudge);
287 :     my $sc = sprintf "%.3f", &sig($n_set1,(@set1 - $n_set1),$n_set2,(@set2 - $n_set2));
288 : parrello 1.20
289 : efrank 1.1 if ($sc >= 1)
290 :     {
291 : overbeek 1.15 push(@hits,[$peg,$sc,scalar $fig_or_sprout->function_of($peg)]);
292 : efrank 1.1 }
293 :     }
294 :     return @hits;
295 :     }
296 :    
297 :     sub sig {
298 :     my($in_has,$in_has_not,$out_has,$out_has_not) = @_;
299 :     my($sx,$sy,$xx,$xy,$yy,$din,$dout);
300 : parrello 1.16 warn "sigs: sig method.\n";
301 : efrank 1.1 $sx = $in_has + $in_has_not;
302 :     $sy = $out_has + $out_has_not;
303 :     $xx = ($in_has * $in_has) + ($in_has_not * $in_has_not);
304 :     $xy = ($in_has * $out_has) + ($in_has_not * $out_has_not);
305 :     $yy = ($out_has * $out_has) + ($out_has_not * $out_has_not);
306 :     (($sx > 0) && ($yy > 0) && ($sy > 0) && ($xx > 0)) || return 0;
307 :     $din = 1 - (($sy * $xy) / ($sx * $yy));
308 :     $dout = 1 - (($sx * $xy) / ($sy * $xx));
309 :     return $din+$dout;
310 :     }
311 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3