[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 :     =head1 proteinfamilies.cgi
6 :    
7 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
8 : redwards 1.1
9 : redwards 1.7 PLEASE NOTE: Do not attempt to read or understand this code. Please leave now. It is a complete mess because it is very experimental and we are trying stuff out. none of it will work. The exit is this way ---->
10 :    
11 : redwards 1.1 =cut
12 :    
13 :     use strict;
14 :     use FIG;
15 :     use HTML;
16 :     use raelib;
17 :     use CGI;
18 : overbeek 1.11 use CGI::Carp qw(fatalsToBrowser);
19 : redwards 1.1 my $cgi=new CGI;
20 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
21 :    
22 :     my $fig;
23 :     eval {
24 :     $fig = new FIG;
25 :     };
26 :    
27 :     if ($@ ne "")
28 :     {
29 :     my $err = $@;
30 :    
31 :     my(@html);
32 :    
33 :     push(@html, $cgi->p("Error connecting to SEED database."));
34 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
35 :     {
36 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
37 :     }
38 :     else
39 :     {
40 :     push(@html, $cgi->pre($err));
41 :     }
42 :     &HTML::show_page($cgi, \@html, 1);
43 :     exit;
44 :     }
45 :    
46 :     my $html = [];
47 :     my $user = $cgi->param('user');
48 :    
49 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
50 :    
51 : redwards 1.2 my %proteinbase=(
52 :     "fig" => "/FIG/protein.cgi?user=$user&prot=fig|",
53 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
54 :     "sp" => "http://www.expasy.org/uniprot/",
55 :     "tr" => "http://www.expasy.org/uniprot/",
56 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
57 :     );
58 :    
59 :    
60 :    
61 :     if ($cgi->param('Show Proteins In Each Family'))
62 : redwards 1.1 {
63 : redwards 1.2 my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
64 :     $cgi->param(-name=>'family', -value=>\@needed);
65 :     &show_family($fig,$cgi,$html);
66 :     }
67 : overbeek 1.11 elsif ($cgi->param('analyse_family')) {
68 :     &analyse_family($fig,$cgi,$html);
69 : redwards 1.7 }
70 : redwards 1.10 elsif ($cgi->param('reverse_analyse_family')) {
71 :     &reverse_analyse_family($fig,$cgi,$html);
72 :     }
73 : redwards 1.2 elsif ($cgi->param('family'))
74 :     {
75 :     &show_family($fig,$cgi,$html);
76 : redwards 1.1 }
77 :     elsif ($cgi->param('prot'))
78 :     {
79 : redwards 1.2 &show_protein($fig,$cgi,$html);
80 : redwards 1.1 }
81 :     else
82 :     {
83 :     &show_initial($fig,$cgi,$html);
84 :     }
85 :    
86 :     &HTML::show_page($cgi,$html,1);
87 :     exit;
88 :    
89 :    
90 :     sub show_initial {
91 :     my ($fig,$cgi,$html)=@_;
92 :     # generate a blank page
93 : redwards 1.2 push @$html,
94 :     "<h2>Protein Families</h2>\n",
95 :     "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
96 :     "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",
97 :     $cgi->start_form(),
98 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",
99 :     "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
100 :     "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
101 :     $cgi->submit, $cgi->reset, $cgi->end_form;
102 : redwards 1.1 return $html;
103 :     }
104 :    
105 : redwards 1.7
106 : redwards 1.2 sub show_family {
107 : redwards 1.1 my ($fig,$cgi,$html)=@_;
108 : redwards 1.2 foreach my $fam ($cgi->param('family')) {
109 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
110 : redwards 1.2 my $tab=[];
111 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
112 : redwards 1.4 foreach my $cid (@cids) {
113 :     my @pegs=$fig->cid_to_prots($cid);
114 : redwards 1.2 foreach my $p (@pegs) {
115 :     foreach my $k (keys %proteinbase) {
116 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
117 : redwards 1.1 }
118 :     }
119 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
120 : redwards 1.1 }
121 : redwards 1.2
122 :     push @$html, "<h2>$fam Family</h2>\n",
123 :     "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
124 : redwards 1.3 "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",
125 :     "even though they may come from different organisms.</p>",
126 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
127 :     $cgi->start_form,
128 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
129 : redwards 1.4 $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
130 : redwards 1.2 $cgi->end_form;
131 : redwards 1.1 }
132 :     }
133 :    
134 : overbeek 1.11 sub show_protein {
135 : redwards 1.1 my ($fig,$cgi,$html)=@_;
136 : overbeek 1.11 foreach my $peg ($cgi->param('prot')) {
137 :     my @families=$fig->families_for_protein($peg);
138 :     unless (@families)
139 :     {
140 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
141 :     return;
142 : redwards 1.1 }
143 :    
144 : redwards 1.2 my $tab=[];
145 : overbeek 1.11 my $self=$cgi->url;
146 :     foreach my $fam (@families) {
147 :     my %idcount;
148 :     my $noprots=scalar(map {$idcount{$_}=1} $fig->ids_in_family($fam));
149 :     #push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
150 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];
151 : redwards 1.2 }
152 : overbeek 1.11
153 :     my $col_hdrs=['Family ID', 'Family Function', 'Number of IDs in Family', 'Choose Family'];
154 :     push @$html, "<h2>Families for $peg</h2>\n",
155 : redwards 1.2 $cgi->start_form,
156 : overbeek 1.11 "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
157 :     &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
158 :     $cgi->submit('Show Proteins In Each Family'),
159 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
160 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
161 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
162 :     $cgi->reset, $cgi->end_form;
163 : redwards 1.2 }
164 :     }
165 : redwards 1.10
166 :    
167 :     sub analyse_family {
168 :     my ($fig,$cgi,$html)=@_;
169 :     # here are the questions:
170 :     # 1. Given a column in a spreadsheet:
171 :     # 2. Here are the proteins in that column
172 :     # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
173 :     # if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
174 :     # so we recommend that a protein be removed from a family.
175 :     # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
176 :     # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
177 :    
178 :     # Note that column == family, But start with fig and then allow a replace ID feature like before.
179 :    
180 : overbeek 1.11 my $focus=$cgi->param('focus') or "fig"; # these are the things that we are interested in
181 : redwards 1.10 foreach my $col (grep {$cgi->param($_)} $cgi->param("allfams"))
182 :     {
183 :     # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
184 :     my %proteins_in_col;
185 :     map {$proteins_in_col{$_}=1} $fig->ids_in_family($col);
186 :    
187 :     # @proteins are the proteins in that column, although these are cids and not fids at the moment
188 :     my $familycount;
189 :     foreach my $prot (keys %proteins_in_col) {
190 :     foreach my $fam ($fig->in_family($prot)) {
191 :     $familycount->{$fam}++;
192 :     }
193 :     }
194 :    
195 :     my $count_of;
196 :     my $fams;
197 :     foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount) {
198 :     # It seems that $sz_family is not right
199 : overbeek 1.11 map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
200 : redwards 1.10 grep {/^$focus/}
201 :     map {$fig->cid_to_prots($_)}
202 :     grep {$proteins_in_col{$_}}
203 :     ($fig->ids_in_family($f));
204 :     }
205 :    
206 :     # create a list of families that we know about
207 : overbeek 1.11 my @fams=grep {!/$col/} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
208 :     unshift @fams, $col;
209 :    
210 :     my $tab=[[["Number of proteins in family", "th colspan=3"], map {scalar(keys %{$fams->{$_}})} @fams]];
211 : redwards 1.10
212 : overbeek 1.11 my @fids;
213 :     if ($cgi->param('sort') eq "genome")
214 :     {
215 :     @fids=sort {$fig->genome_of($a) <=> $fig->genome_of($b)} keys %$count_of;
216 :     }
217 :     else
218 :     {
219 :     @fids=sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of;
220 :     }
221 :    
222 :     my $rowcount;
223 :     foreach my $fid (@fids)
224 :     {
225 :     my @row=(++$rowcount, $fid, scalar(keys %{$count_of->{$fid}}));
226 :    
227 :     foreach my $fam (@fams) {
228 :     $count_of->{$fid}->{$fam} ? push @row, ["Y", "td style='background-color: lightgrey; text-align: center'"] : push @row, " &nbsp ";
229 : redwards 1.10 }
230 :     push @$tab, \@row;
231 :     }
232 : overbeek 1.11
233 :     my @hidden=map {$cgi->hidden(-name=>$_)} grep {$cgi->param($_)} $cgi->param("allfams");
234 :     push @$html,
235 :     $cgi->start_form(), "Limit the display to proteins from ", &choose_focus($cgi),
236 :     $cgi->p("These are proteins that ARE in ", $fig->family_function($col), " ($col) and are in other families that have proteins in this family."),
237 :     $cgi->hidden(-name=>"allfams"), $cgi->hidden("prot"), @hidden, $cgi->hidden(-name=>"user"),
238 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
239 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
240 :     &HTML::make_table(["Count", "Protein ID", "Number of fams protein is in", @fams], $tab,' &nbsp; ');
241 : redwards 1.10 }
242 :     }
243 :    
244 :    
245 :     sub reverse_analyse_family {
246 :     my ($fig,$cgi,$html)=@_;
247 :     # here are the questions:
248 :     # 1. Given a column in a spreadsheet:
249 :     # 2. Here are the proteins in that column
250 :     # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
251 :     # if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
252 :     # so we recommend that a protein be removed from a family.
253 :     # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
254 :     # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
255 :    
256 :     # Note that column == family, But start with fig and then allow a replace ID feature like before.
257 :    
258 : overbeek 1.11 my $focus=$cgi->param('focus'); # these are the things that we are interested in
259 :     unless ($focus) {$focus="fig"}
260 : redwards 1.10 foreach my $col (grep {$cgi->param($_)} $cgi->param("allfams"))
261 :     {
262 :     # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
263 :     my %proteins_in_col;
264 :     map {$proteins_in_col{$_}=1} $fig->ids_in_family($col);
265 :    
266 :     # @proteins are the proteins in that column, although these are cids and not fids at the moment
267 :     my $familycount;
268 :     foreach my $prot (keys %proteins_in_col) {
269 :     foreach my $fam ($fig->in_family($prot)) {
270 :     $familycount->{$fam}++;
271 :     }
272 :     }
273 :    
274 :     my $count_of;
275 :     my $fams;
276 :     foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount) {
277 :     # It seems that $sz_family is not right
278 : overbeek 1.11 map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
279 : redwards 1.10 grep {/^$focus/}
280 :     map {$fig->cid_to_prots($_)}
281 :     grep {!$proteins_in_col{$_}}
282 :     ($fig->ids_in_family($f));
283 :     }
284 :    
285 :     # create a list of families that we know about
286 : overbeek 1.11 my @fams=grep {!/$col/} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
287 :     unshift @fams, $col;
288 :    
289 :     my $tab=[[["Number of proteins in family", "th colspan=3"], map {scalar(keys %{$fams->{$_}})} @fams]];
290 : redwards 1.10
291 : overbeek 1.11 my @fids;
292 :     if ($cgi->param('sort') eq "genome")
293 :     {
294 :     @fids=sort {$fig->genome_of($a) <=> $fig->genome_of($b)} keys %$count_of;
295 :     }
296 :     else
297 :     {
298 :     @fids=sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of;
299 :     }
300 :    
301 :     my $rowcount;
302 :     foreach my $fid (@fids)
303 :     {
304 :     my @row=(++$rowcount, $fid, scalar(keys %{$count_of->{$fid}}));
305 :    
306 :     foreach my $fam (@fams) {
307 :     $count_of->{$fid}->{$fam} ? push @row, ["Y", "td style='background-color: lightgrey; text-align: center'"] : push @row, " &nbsp ";
308 : redwards 1.10 }
309 :     push @$tab, \@row;
310 :     }
311 : overbeek 1.11
312 :     my @hidden=map {$cgi->hidden(-name=>$_)} grep {$cgi->param($_)} $cgi->param("allfams");
313 :     push @$html,
314 :     $cgi->start_form(), "Limit the display to proteins from ", &choose_focus($cgi),
315 :     $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family."),
316 :     $cgi->hidden(-name=>"allfams"), $cgi->hidden("prot"), @hidden, $cgi->hidden(-name=>"user"),
317 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
318 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
319 :     &HTML::make_table(["Count", "Protein ID", "Number of fams protein is in", @fams], $tab,' &nbsp; ');
320 : redwards 1.10 }
321 :     }
322 :    
323 : overbeek 1.11
324 :     sub choose_focus {
325 :     my ($cgi)=@_;
326 :     my %choices=(
327 :     "fig" => "FIGfams",
328 :     "tigr" => "TIGRfams",
329 :     "pfam" => "PFAM",
330 :     "sp" => "SwissProt",
331 :     "kegg" => "KEGG",
332 :     "pir" => "PIR SuperFams",
333 :     "mcl" => "MCL",
334 :     "cog" => "COG",
335 :     );
336 :    
337 :     my $default = $cgi->param("focus"); unless ($default) {$default="fig"}
338 :    
339 :     return $cgi->popup_menu(
340 :     -name => "focus",
341 :     -values => [keys %choices],
342 :     -labels => \%choices,
343 :     -default => $default,
344 :     );
345 :     }
346 : redwards 1.10
347 : redwards 1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3