[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 :     =head1 proteinfamilies.cgi
6 :    
7 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
8 : redwards 1.1
9 :     =cut
10 :    
11 :     use strict;
12 :     use FIG;
13 :     use HTML;
14 :     use raelib;
15 :     use CGI;
16 :     my $cgi=new CGI;
17 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
18 :    
19 :     my $fig;
20 :     eval {
21 :     $fig = new FIG;
22 :     };
23 :    
24 :     if ($@ ne "")
25 :     {
26 :     my $err = $@;
27 :    
28 :     my(@html);
29 :    
30 :     push(@html, $cgi->p("Error connecting to SEED database."));
31 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
32 :     {
33 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
34 :     }
35 :     else
36 :     {
37 :     push(@html, $cgi->pre($err));
38 :     }
39 :     &HTML::show_page($cgi, \@html, 1);
40 :     exit;
41 :     }
42 :    
43 :     my $html = [];
44 :     my $user = $cgi->param('user');
45 :    
46 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
47 :    
48 : redwards 1.2 my %proteinbase=(
49 :     "fig" => "/FIG/protein.cgi?user=$user&prot=fig|",
50 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
51 :     "sp" => "http://www.expasy.org/uniprot/",
52 :     "tr" => "http://www.expasy.org/uniprot/",
53 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
54 :     );
55 :    
56 :    
57 :    
58 :     if ($cgi->param('Show Proteins In Each Family'))
59 : redwards 1.1 {
60 : redwards 1.2 my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
61 :     $cgi->param(-name=>'family', -value=>\@needed);
62 :     &show_family($fig,$cgi,$html);
63 :     }
64 :     elsif ($cgi->param("Combine Families With And"))
65 :     {
66 :     &combine_families($fig,$cgi,$html);
67 :     }
68 : redwards 1.5 elsif ($cgi->param('proteins_between_two'))
69 :     {
70 :     &proteins_between_two($fig, $cgi, $html);
71 :     }
72 : redwards 1.2 elsif ($cgi->param('Compare FIG Functions'))
73 :     {
74 :     my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
75 :     $cgi->param(-name=>'family', -value=>\@needed);
76 :     &compare_functions($fig,$cgi,$html);
77 :     }
78 : redwards 1.4 elsif ($cgi->param('Suggest')) {
79 :     &suggest_proteins($fig,$cgi,$html);
80 :     }
81 : redwards 1.2 elsif ($cgi->param('family'))
82 :     {
83 :     &show_family($fig,$cgi,$html);
84 : redwards 1.1 }
85 :     elsif ($cgi->param('prot'))
86 :     {
87 : redwards 1.2 &show_protein($fig,$cgi,$html);
88 : redwards 1.1 }
89 :     else
90 :     {
91 :     &show_initial($fig,$cgi,$html);
92 :     }
93 :    
94 :     &HTML::show_page($cgi,$html,1);
95 :     exit;
96 :    
97 :    
98 :     sub show_initial {
99 :     my ($fig,$cgi,$html)=@_;
100 :     # generate a blank page
101 : redwards 1.2 push @$html,
102 :     "<h2>Protein Families</h2>\n",
103 :     "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
104 :     "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",
105 :     $cgi->start_form(),
106 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",
107 :     "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
108 :     "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
109 :     $cgi->submit, $cgi->reset, $cgi->end_form;
110 : redwards 1.1 return $html;
111 :     }
112 :    
113 : redwards 1.2 sub show_protein {
114 : redwards 1.1 my ($fig,$cgi,$html)=@_;
115 : redwards 1.2 foreach my $peg ($cgi->param('prot')) {
116 :     my @families=$fig->families_for_protein($peg);
117 :     unless (@families)
118 :     {
119 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
120 :     return;
121 :     }
122 :    
123 :     my $tab=[];
124 :     my $self=$cgi->url;
125 :     foreach my $fam (@families) {
126 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
127 : redwards 1.1 }
128 : redwards 1.2
129 :     my $col_hdrs=['Family ID', 'Family Function', 'Number of Unique Proteins in Family', 'Choose Family'];
130 :     push @$html, "<h2>Families for $peg</h2>\n",
131 :     $cgi->start_form,
132 :     "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
133 :     &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
134 : redwards 1.6 $cgi->submit('Show Proteins In Each Family'), $cgi->submit(-name=>"Suggest", -value=>"Suggest Additional families for this protein"),
135 :     $cgi->submit(-name=>'proteins_between_two', -value=>"Compare two or more families for missing"), "<br>\n",
136 : redwards 1.4 $cgi->submit("Combine Families With And"), $cgi->submit("Compare FIG Functions"),
137 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
138 : redwards 1.2 $cgi->reset, $cgi->end_form;
139 : redwards 1.1 }
140 :     }
141 : redwards 1.2
142 :     sub show_family {
143 : redwards 1.1 my ($fig,$cgi,$html)=@_;
144 : redwards 1.2 foreach my $fam ($cgi->param('family')) {
145 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
146 : redwards 1.2 my $tab=[];
147 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
148 : redwards 1.4 foreach my $cid (@cids) {
149 :     my @pegs=$fig->cid_to_prots($cid);
150 : redwards 1.2 foreach my $p (@pegs) {
151 :     foreach my $k (keys %proteinbase) {
152 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
153 : redwards 1.1 }
154 :     }
155 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
156 : redwards 1.1 }
157 : redwards 1.2
158 :     push @$html, "<h2>$fam Family</h2>\n",
159 :     "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
160 : redwards 1.3 "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",
161 :     "even though they may come from different organisms.</p>",
162 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
163 :     $cgi->start_form,
164 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
165 : redwards 1.4 $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
166 : redwards 1.2 $cgi->end_form;
167 : redwards 1.1 }
168 :     }
169 :    
170 : redwards 1.2 sub combine_families {
171 : redwards 1.1 my ($fig,$cgi,$html)=@_;
172 : redwards 1.4 # first find all the families and all the cids in those families
173 :     my $cids; my @families; my $allprots;
174 : redwards 1.2 foreach my $f (grep {$cgi->param($_)} $cgi->param("allfams"))
175 :     {
176 :     push @families, $f;
177 : redwards 1.4 foreach my $e ($fig->ids_in_family($f)) {$cids->{$e}->{$f}=1; $allprots++}
178 : redwards 1.1 }
179 : redwards 1.3
180 : redwards 1.4 # now figure out those cids that are in all families
181 : redwards 1.2 # we are going to do this with a boolean switch rather than just counting occurences
182 :     my @wanted;
183 : redwards 1.4 foreach my $cid (keys %$cids) {
184 : redwards 1.2 my $keep=1;
185 : redwards 1.4 foreach my $f (@families) {undef $keep unless ($cids->{$cid}->{$f})}
186 :     push @wanted, $cid if ($keep);
187 : redwards 1.2 }
188 :    
189 :     my $tab=[];
190 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
191 : redwards 1.4 foreach my $cid (sort {$a <=> $b} @wanted) {
192 :     my @pegs=$fig->cid_to_prots($cid);
193 : redwards 1.2 foreach my $p (@pegs) {
194 :     foreach my $k (keys %proteinbase) {
195 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
196 :     }
197 : redwards 1.1 }
198 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
199 : redwards 1.1 }
200 : redwards 1.2
201 :    
202 :     push @$html, "<h2>Proteins in ", (join ", ", (@families)), " families</h2>\n",
203 : redwards 1.3 "<h3>Summary</h3>\n<p>There were $allprots proteins in the ", scalar(@families), " families that you selected.\n<br>",
204 : redwards 1.4 "Out of a those proteins, there were ", scalar(keys %$cids),
205 : redwards 1.3 " unique proteins, and ", scalar(@wanted), " proteins are present in the ",
206 :     scalar(@families), " families.</p>\n",
207 :     "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, even though they may come from different organisms.</p>",
208 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
209 :     $cgi->start_form,
210 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . (join ", ", (@families)) . " families"),
211 :     $cgi->end_form;
212 : redwards 1.1 }
213 :    
214 : redwards 1.2 sub compare_functions {
215 :     my ($fig,$cgi,$html)=@_;
216 :     foreach my $fam ($cgi->param('family')) {
217 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
218 : redwards 1.2 my $tab=[];
219 :     my $col_hdrs=['FIG ID', 'Genome', 'Assigned Function'];
220 : redwards 1.4 foreach my $cid (@cids) {
221 :     foreach my $peg (grep {/^fig/} $fig->cid_to_prots($cid)) {
222 : redwards 1.2 my $p=$peg;
223 :     $p =~ s/fig\|//;
224 :     $p = "<a href='$proteinbase{'fig'}$p'>$peg</a>";
225 :     push @$tab, [$p, ($fig->genus_species($fig->genome_of($peg))), scalar($fig->function_of($peg))];
226 :     }
227 :     }
228 :    
229 :     push @$html, "<h2>Functions in $fam Family</h2>\n",
230 :     "<p>The family $fam has the function ", $fig->family_function($fam), ".\n",
231 :     "The functions shown here are the functions of just the proteins in the SEED database from this family</p>",
232 :     $cgi->start_form,
233 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
234 : redwards 1.4 $cgi->hidden(-name=>'prot'), $cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
235 : redwards 1.2 $cgi->end_form;
236 :     }
237 :     }
238 : redwards 1.1
239 : redwards 1.4 sub suggest_proteins {
240 :     my ($fig,$cgi,$html)=@_;
241 :     my $prot=$cgi->param('prot');
242 :     my @families=$fig->families_for_protein($prot);
243 :     my $knownfamily;
244 :     map {$knownfamily->{$_}=1} @families;
245 :    
246 :     print STDERR "Looking through ", scalar(@families), " families\n";
247 :     my $famcount=1;my $time=time;
248 :     my $content;
249 : redwards 1.5 my $row;
250 : redwards 1.4 foreach my $fam (@families) {
251 :     print STDERR "$famcount in ", time-$time, "\n"; $time=time; $famcount++;
252 :    
253 : redwards 1.5 push @$row, [$fam, 'td style="background-color: grey'];
254 :    
255 : redwards 1.4 my @newprots=$fig->proteins_in_family($fam);
256 : redwards 1.5 push @$row, scalar(@newprots);
257 :    
258 : redwards 1.4 print STDERR "$fam has ", scalar(@newprots), "\n";
259 :     foreach my $p (@newprots) {
260 :     my @newfamilies=$fig->families_for_protein($p);
261 :     foreach my $nf (@newfamilies) {
262 :     next if ($knownfamily->{$nf});
263 :     push @{$content->{$nf}}, $p;
264 :     }
265 :     }
266 :     }
267 :    
268 :     my $tab=[];
269 :     @$tab=map {[$_, $fig->family_function($_), scalar(@{$content->{$_}})]} keys %$content;
270 : redwards 1.5 my $col_hdrs=["Family ID", "Family function", "Number of proteins in family"];
271 : redwards 1.4
272 :     push @$html, $cgi->p("$prot is in families ", join ",", @families),
273 :     $cgi->p("These are the other families that $prot should also appear in:"),
274 :     &HTML::make_table($col_hdrs, $tab, "Other Families"), "\n";
275 :     }
276 : redwards 1.5
277 :    
278 :     sub proteins_between_two {
279 :     my ($fig,$cgi,$html)=@_;
280 :     # first find all the families and all the cids in those families
281 :     my $cids; my @families; my $allprots;
282 :     foreach my $f (grep {$cgi->param($_)} $cgi->param("allfams"))
283 :     {
284 :     push @families, $f;
285 :     foreach my $e ($fig->ids_in_family($f)) {$cids->{$e}->{$f}=1; $allprots++}
286 :     }
287 :    
288 :     # now figure out those cids that are in all families
289 :     # we are going to do this with a boolean switch rather than just counting occurences
290 :     my @wanted; my @missing;
291 :     foreach my $cid (keys %$cids) {
292 :     my $keep=1;
293 :     foreach my $f (@families) {
294 :     unless ($cids->{$cid}->{$f}) {
295 :     undef $keep;
296 :     push @missing, $cid;
297 :     }
298 :     }
299 :     push @wanted, $cid if ($keep);
300 :     }
301 :    
302 :     # make some HTML header
303 :     push @$html, "<h2>Comparison of different protein families</h2><ul>\n";
304 :     foreach my $f (sort @families) {push @$html, "<li>", $fig->family_function($f), " ($f) </li>\n"}
305 :     push @$html, "</ul><p>These are the proteins that are unique to one or other of these families:</p>";
306 :    
307 :     foreach my $f (sort @families) {
308 :    
309 :     my $tab=[];
310 :     foreach my $p (sort {$a <=> $b} @missing) {
311 :     next unless ($cids->{$p}->{$f});
312 :     push @$tab, [$p, (join ", ", $fig->cid_to_prots($p))];
313 :     }
314 :    
315 :     my $list;
316 :     foreach my $x (@families) {next if ($f eq $x); $list .= "<li>". $fig->family_function($x). " ($x)</li>\n"}
317 :     push @$html, $cgi->p, "<div style='font-weight: bolder'>Proteins that are in <br><center style='font-weight: bigger'>",
318 :     $fig->family_function($f), " ($f)</center><br>\nthat are not in <ul>$list</ul>\n</div>\n",
319 :     &HTML::make_table(["Cluster ID", "Other Proteins"], $tab, " &nbsp; ");
320 :     }
321 :     }
322 : redwards 1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3