[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 :     =head1 proteinfamilies.cgi
6 :    
7 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
8 : redwards 1.1
9 : redwards 1.7 PLEASE NOTE: Do not attempt to read or understand this code. Please leave now. It is a complete mess because it is very experimental and we are trying stuff out. none of it will work. The exit is this way ---->
10 :    
11 : redwards 1.1 =cut
12 :    
13 :     use strict;
14 :     use FIG;
15 :     use HTML;
16 :     use raelib;
17 :     use CGI;
18 : overbeek 1.11 use CGI::Carp qw(fatalsToBrowser);
19 : redwards 1.1 my $cgi=new CGI;
20 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
21 :    
22 :     my $fig;
23 :     eval {
24 :     $fig = new FIG;
25 :     };
26 :    
27 :     if ($@ ne "")
28 :     {
29 :     my $err = $@;
30 :    
31 :     my(@html);
32 :    
33 :     push(@html, $cgi->p("Error connecting to SEED database."));
34 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
35 :     {
36 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
37 :     }
38 :     else
39 :     {
40 :     push(@html, $cgi->pre($err));
41 :     }
42 :     &HTML::show_page($cgi, \@html, 1);
43 :     exit;
44 :     }
45 :    
46 :     my $html = [];
47 :     my $user = $cgi->param('user');
48 :    
49 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
50 :    
51 : redwards 1.2 my %proteinbase=(
52 :     "fig" => "/FIG/protein.cgi?user=$user&prot=fig|",
53 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
54 :     "sp" => "http://www.expasy.org/uniprot/",
55 :     "tr" => "http://www.expasy.org/uniprot/",
56 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
57 :     );
58 :    
59 :    
60 :    
61 :     if ($cgi->param('Show Proteins In Each Family'))
62 : redwards 1.1 {
63 : redwards 1.2 my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
64 :     $cgi->param(-name=>'family', -value=>\@needed);
65 :     &show_family($fig,$cgi,$html);
66 :     }
67 : overbeek 1.11 elsif ($cgi->param('analyse_family')) {
68 : overbeek 1.12 # finding what is there is the same as findingh what is missing you just need one extra !
69 :     # these two things call the same method.
70 :     &analyse_family($fig,$cgi,$html, 0);
71 : redwards 1.7 }
72 : redwards 1.10 elsif ($cgi->param('reverse_analyse_family')) {
73 : overbeek 1.12 &analyse_family($fig,$cgi,$html, 1);
74 : redwards 1.10 }
75 : redwards 1.2 elsif ($cgi->param('family'))
76 :     {
77 :     &show_family($fig,$cgi,$html);
78 : redwards 1.1 }
79 :     elsif ($cgi->param('prot'))
80 :     {
81 : redwards 1.2 &show_protein($fig,$cgi,$html);
82 : redwards 1.1 }
83 :     else
84 :     {
85 :     &show_initial($fig,$cgi,$html);
86 :     }
87 :    
88 :     &HTML::show_page($cgi,$html,1);
89 :     exit;
90 :    
91 :    
92 :     sub show_initial {
93 :     my ($fig,$cgi,$html)=@_;
94 :     # generate a blank page
95 : redwards 1.2 push @$html,
96 :     "<h2>Protein Families</h2>\n",
97 :     "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
98 :     "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",
99 :     $cgi->start_form(),
100 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",
101 :     "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
102 :     "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
103 :     $cgi->submit, $cgi->reset, $cgi->end_form;
104 : redwards 1.1 return $html;
105 :     }
106 :    
107 : redwards 1.7
108 : redwards 1.2 sub show_family {
109 : redwards 1.1 my ($fig,$cgi,$html)=@_;
110 : redwards 1.2 foreach my $fam ($cgi->param('family')) {
111 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
112 : redwards 1.2 my $tab=[];
113 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
114 : redwards 1.4 foreach my $cid (@cids) {
115 :     my @pegs=$fig->cid_to_prots($cid);
116 : redwards 1.2 foreach my $p (@pegs) {
117 :     foreach my $k (keys %proteinbase) {
118 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
119 : redwards 1.1 }
120 :     }
121 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
122 : redwards 1.1 }
123 : redwards 1.2
124 :     push @$html, "<h2>$fam Family</h2>\n",
125 :     "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
126 : redwards 1.3 "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",
127 :     "even though they may come from different organisms.</p>",
128 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
129 :     $cgi->start_form,
130 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
131 : redwards 1.4 $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
132 : overbeek 1.12 $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
133 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
134 : redwards 1.2 $cgi->end_form;
135 : redwards 1.1 }
136 :     }
137 :    
138 : overbeek 1.11 sub show_protein {
139 : redwards 1.1 my ($fig,$cgi,$html)=@_;
140 : overbeek 1.11 foreach my $peg ($cgi->param('prot')) {
141 :     my @families=$fig->families_for_protein($peg);
142 :     unless (@families)
143 :     {
144 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
145 :     return;
146 : redwards 1.1 }
147 :    
148 : redwards 1.2 my $tab=[];
149 : overbeek 1.11 my $self=$cgi->url;
150 :     foreach my $fam (@families) {
151 :     my %idcount;
152 :     my $noprots=scalar(map {$idcount{$_}=1} $fig->ids_in_family($fam));
153 :     #push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
154 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];
155 : redwards 1.2 }
156 : overbeek 1.11
157 :     my $col_hdrs=['Family ID', 'Family Function', 'Number of IDs in Family', 'Choose Family'];
158 :     push @$html, "<h2>Families for $peg</h2>\n",
159 : redwards 1.2 $cgi->start_form,
160 : overbeek 1.11 "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
161 :     &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
162 :     $cgi->submit('Show Proteins In Each Family'),
163 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
164 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
165 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
166 :     $cgi->reset, $cgi->end_form;
167 : redwards 1.2 }
168 :     }
169 : redwards 1.10
170 :    
171 : overbeek 1.12
172 :    
173 : redwards 1.10 sub analyse_family {
174 : overbeek 1.12 my ($fig,$cgi,$html, $reverse)=@_;
175 : redwards 1.10 # here are the questions:
176 :     # 1. Given a column in a spreadsheet:
177 :     # 2. Here are the proteins in that column
178 :     # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
179 :     # if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
180 :     # so we recommend that a protein be removed from a family.
181 :     # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
182 :     # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
183 :    
184 :     # Note that column == family, But start with fig and then allow a replace ID feature like before.
185 :    
186 : overbeek 1.12 my $focus=$cgi->param('focus'); # these are the things that we are interested in
187 :     unless ($focus) {$focus="fig"}
188 :    
189 :     my @cols;
190 :     if ($cgi->param("allfams")) {@cols=grep {$cgi->param($_)} $cgi->param("allfams")}
191 :     elsif ($cgi->param("family")) {push @cols, $cgi->param('family')}
192 :     else {die "No families declared!"}
193 :    
194 :     foreach my $col (@cols)
195 : redwards 1.10 {
196 :     # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
197 : overbeek 1.12 my $proteins_in_col;
198 :     map {$proteins_in_col->{$_}=1} $fig->ids_in_family($col);
199 : redwards 1.10
200 :     # @proteins are the proteins in that column, although these are cids and not fids at the moment
201 :     my $familycount;
202 : overbeek 1.12 foreach my $prot (keys %$proteins_in_col) {
203 : redwards 1.10 foreach my $fam ($fig->in_family($prot)) {
204 :     $familycount->{$fam}++;
205 :     }
206 :     }
207 :    
208 :     my $count_of;
209 :     my $fams;
210 : overbeek 1.12 foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount)
211 :     {
212 :     if ($reverse) {($fams, $count_of)=&ids_missing_from_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
213 :     else {($fams, $count_of)=&ids_are_in_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
214 :     }
215 : redwards 1.10
216 :     # create a list of families that we know about
217 : overbeek 1.11 my @fams=grep {!/$col/} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
218 :     unshift @fams, $col;
219 :    
220 :     my $tab=[[["Number of proteins in family", "th colspan=3"], map {scalar(keys %{$fams->{$_}})} @fams]];
221 : redwards 1.10
222 : overbeek 1.11 my @fids;
223 :     if ($cgi->param('sort') eq "genome")
224 :     {
225 :     @fids=sort {$fig->genome_of($a) <=> $fig->genome_of($b)} keys %$count_of;
226 :     }
227 :     else
228 :     {
229 :     @fids=sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of;
230 :     }
231 :    
232 :     my $rowcount;
233 :     foreach my $fid (@fids)
234 :     {
235 :     my @row=(++$rowcount, $fid, scalar(keys %{$count_of->{$fid}}));
236 :    
237 :     foreach my $fam (@fams) {
238 :     $count_of->{$fid}->{$fam} ? push @row, ["Y", "td style='background-color: lightgrey; text-align: center'"] : push @row, " &nbsp ";
239 : redwards 1.10 }
240 :     push @$tab, \@row;
241 :     }
242 : overbeek 1.12
243 :     push @$html, $cgi->start_form(), "Limit the display to proteins from ", &choose_focus($cgi);
244 :     if ($reverse) {
245 :     push @$html, $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family.");
246 :     } else {
247 :     push @$html, $cgi->p("These are proteins that ARE in ", $fig->family_function($col), " ($col) and are in other families that have proteins in this family.");
248 :     }
249 :    
250 :     push @$html,
251 :     $cgi->hidden(-name=>"family", -value=>@cols), $cgi->hidden("prot"), $cgi->hidden(-name=>"user"),
252 : overbeek 1.11 $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
253 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
254 :     &HTML::make_table(["Count", "Protein ID", "Number of fams protein is in", @fams], $tab,' &nbsp; ');
255 : redwards 1.10 }
256 :     }
257 :    
258 :    
259 : overbeek 1.12 sub ids_are_in_fam {
260 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
261 :     # It seems that $sz_family is not right
262 :     map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
263 :     grep {/^$focus/}
264 :     map {$fig->cid_to_prots($_)}
265 :     grep {$proteins_in_col->{$_}}
266 :     ($fig->ids_in_family($f));
267 :     return ($fams, $count_of);
268 :     }
269 : redwards 1.10
270 : overbeek 1.12 sub ids_missing_from_fam {
271 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
272 : redwards 1.10 # It seems that $sz_family is not right
273 : overbeek 1.11 map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
274 : redwards 1.10 grep {/^$focus/}
275 :     map {$fig->cid_to_prots($_)}
276 : overbeek 1.12 grep {!$proteins_in_col->{$_}}
277 : redwards 1.10 ($fig->ids_in_family($f));
278 : overbeek 1.12 return ($fams, $count_of);
279 :     }
280 :    
281 : overbeek 1.11
282 : redwards 1.10
283 : overbeek 1.11 sub choose_focus {
284 :     my ($cgi)=@_;
285 :     my %choices=(
286 :     "fig" => "FIGfams",
287 :     "tigr" => "TIGRfams",
288 :     "pfam" => "PFAM",
289 :     "sp" => "SwissProt",
290 :     "kegg" => "KEGG",
291 :     "pir" => "PIR SuperFams",
292 :     "mcl" => "MCL",
293 :     "cog" => "COG",
294 :     );
295 :    
296 :     my $default = $cgi->param("focus"); unless ($default) {$default="fig"}
297 :    
298 :     return $cgi->popup_menu(
299 :     -name => "focus",
300 :     -values => [keys %choices],
301 :     -labels => \%choices,
302 :     -default => $default,
303 :     );
304 :     }
305 : redwards 1.10
306 : redwards 1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3