[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 :     =head1 proteinfamilies.cgi
6 :    
7 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
8 : redwards 1.1
9 :     =cut
10 :    
11 :     use strict;
12 :     use FIG;
13 :     use HTML;
14 :     use raelib;
15 :     use CGI;
16 :     my $cgi=new CGI;
17 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
18 :    
19 :     my $fig;
20 :     eval {
21 :     $fig = new FIG;
22 :     };
23 :    
24 :     if ($@ ne "")
25 :     {
26 :     my $err = $@;
27 :    
28 :     my(@html);
29 :    
30 :     push(@html, $cgi->p("Error connecting to SEED database."));
31 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
32 :     {
33 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
34 :     }
35 :     else
36 :     {
37 :     push(@html, $cgi->pre($err));
38 :     }
39 :     &HTML::show_page($cgi, \@html, 1);
40 :     exit;
41 :     }
42 :    
43 :     my $html = [];
44 :     my $user = $cgi->param('user');
45 :    
46 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
47 :    
48 : redwards 1.2 my %proteinbase=(
49 :     "fig" => "/FIG/protein.cgi?user=$user&prot=fig|",
50 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
51 :     "sp" => "http://www.expasy.org/uniprot/",
52 :     "tr" => "http://www.expasy.org/uniprot/",
53 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
54 :     );
55 :    
56 :    
57 :    
58 :     if ($cgi->param('Show Proteins In Each Family'))
59 : redwards 1.1 {
60 : redwards 1.2 my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
61 :     $cgi->param(-name=>'family', -value=>\@needed);
62 :     &show_family($fig,$cgi,$html);
63 :     }
64 :     elsif ($cgi->param("Combine Families With And"))
65 :     {
66 :     &combine_families($fig,$cgi,$html);
67 :     }
68 :     elsif ($cgi->param('Compare FIG Functions'))
69 :     {
70 :     my @needed=grep {$cgi->param($_)} $cgi->param("allfams");
71 :     $cgi->param(-name=>'family', -value=>\@needed);
72 :     &compare_functions($fig,$cgi,$html);
73 :     }
74 : redwards 1.4 elsif ($cgi->param('Suggest')) {
75 :     &suggest_proteins($fig,$cgi,$html);
76 :     }
77 : redwards 1.2 elsif ($cgi->param('family'))
78 :     {
79 :     &show_family($fig,$cgi,$html);
80 : redwards 1.1 }
81 :     elsif ($cgi->param('prot'))
82 :     {
83 : redwards 1.2 &show_protein($fig,$cgi,$html);
84 : redwards 1.1 }
85 :     else
86 :     {
87 :     &show_initial($fig,$cgi,$html);
88 :     }
89 :    
90 :     &HTML::show_page($cgi,$html,1);
91 :     exit;
92 :    
93 :    
94 :     sub show_initial {
95 :     my ($fig,$cgi,$html)=@_;
96 :     # generate a blank page
97 : redwards 1.2 push @$html,
98 :     "<h2>Protein Families</h2>\n",
99 :     "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
100 :     "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",
101 :     $cgi->start_form(),
102 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",
103 :     "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
104 :     "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
105 :     $cgi->submit, $cgi->reset, $cgi->end_form;
106 : redwards 1.1 return $html;
107 :     }
108 :    
109 : redwards 1.2 sub show_protein {
110 : redwards 1.1 my ($fig,$cgi,$html)=@_;
111 : redwards 1.2 foreach my $peg ($cgi->param('prot')) {
112 :     my @families=$fig->families_for_protein($peg);
113 :     unless (@families)
114 :     {
115 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
116 :     return;
117 :     }
118 :    
119 :     my $tab=[];
120 :     my $self=$cgi->url;
121 :     foreach my $fam (@families) {
122 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
123 : redwards 1.1 }
124 : redwards 1.2
125 :     my $col_hdrs=['Family ID', 'Family Function', 'Number of Unique Proteins in Family', 'Choose Family'];
126 :     push @$html, "<h2>Families for $peg</h2>\n",
127 :     $cgi->start_form,
128 :     "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
129 :     &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
130 : redwards 1.4 $cgi->submit('Show Proteins In Each Family'), $cgi->submit(-name=>"Suggest", -value=>"Suggest Proteins for this Family"),
131 :     $cgi->submit("Combine Families With And"), $cgi->submit("Compare FIG Functions"),
132 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
133 : redwards 1.2 $cgi->reset, $cgi->end_form;
134 : redwards 1.1 }
135 :     }
136 : redwards 1.2
137 :     sub show_family {
138 : redwards 1.1 my ($fig,$cgi,$html)=@_;
139 : redwards 1.2 foreach my $fam ($cgi->param('family')) {
140 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
141 : redwards 1.2 my $tab=[];
142 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
143 : redwards 1.4 foreach my $cid (@cids) {
144 :     my @pegs=$fig->cid_to_prots($cid);
145 : redwards 1.2 foreach my $p (@pegs) {
146 :     foreach my $k (keys %proteinbase) {
147 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
148 : redwards 1.1 }
149 :     }
150 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
151 : redwards 1.1 }
152 : redwards 1.2
153 :     push @$html, "<h2>$fam Family</h2>\n",
154 :     "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
155 : redwards 1.3 "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",
156 :     "even though they may come from different organisms.</p>",
157 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
158 :     $cgi->start_form,
159 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
160 : redwards 1.4 $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
161 : redwards 1.2 $cgi->end_form;
162 : redwards 1.1 }
163 :     }
164 :    
165 : redwards 1.2 sub combine_families {
166 : redwards 1.1 my ($fig,$cgi,$html)=@_;
167 : redwards 1.4 # first find all the families and all the cids in those families
168 :     my $cids; my @families; my $allprots;
169 : redwards 1.2 foreach my $f (grep {$cgi->param($_)} $cgi->param("allfams"))
170 :     {
171 :     push @families, $f;
172 : redwards 1.4 foreach my $e ($fig->ids_in_family($f)) {$cids->{$e}->{$f}=1; $allprots++}
173 : redwards 1.1 }
174 : redwards 1.3
175 : redwards 1.4 # now figure out those cids that are in all families
176 : redwards 1.2 # we are going to do this with a boolean switch rather than just counting occurences
177 :     my @wanted;
178 : redwards 1.4 foreach my $cid (keys %$cids) {
179 : redwards 1.2 my $keep=1;
180 : redwards 1.4 foreach my $f (@families) {undef $keep unless ($cids->{$cid}->{$f})}
181 :     push @wanted, $cid if ($keep);
182 : redwards 1.2 }
183 :    
184 :     my $tab=[];
185 : redwards 1.3 my $col_hdrs=['Cluster ID', 'Polypeptides with same amino acid sequence'];
186 : redwards 1.4 foreach my $cid (sort {$a <=> $b} @wanted) {
187 :     my @pegs=$fig->cid_to_prots($cid);
188 : redwards 1.2 foreach my $p (@pegs) {
189 :     foreach my $k (keys %proteinbase) {
190 :     if ($p =~ /^$k/) {$p =~ s/^(.*?)\|//; $p = "<a href='$proteinbase{$k}$p'>$1|$p</a>"}
191 :     }
192 : redwards 1.1 }
193 : redwards 1.4 push @$tab, [$cid, (join ", ", (@pegs))];
194 : redwards 1.1 }
195 : redwards 1.2
196 :    
197 :     push @$html, "<h2>Proteins in ", (join ", ", (@families)), " families</h2>\n",
198 : redwards 1.3 "<h3>Summary</h3>\n<p>There were $allprots proteins in the ", scalar(@families), " families that you selected.\n<br>",
199 : redwards 1.4 "Out of a those proteins, there were ", scalar(keys %$cids),
200 : redwards 1.3 " unique proteins, and ", scalar(@wanted), " proteins are present in the ",
201 :     scalar(@families), " families.</p>\n",
202 :     "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, even though they may come from different organisms.</p>",
203 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
204 :     $cgi->start_form,
205 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . (join ", ", (@families)) . " families"),
206 :     $cgi->end_form;
207 : redwards 1.1 }
208 :    
209 : redwards 1.2 sub compare_functions {
210 :     my ($fig,$cgi,$html)=@_;
211 :     foreach my $fam ($cgi->param('family')) {
212 : redwards 1.4 my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);
213 : redwards 1.2 my $tab=[];
214 :     my $col_hdrs=['FIG ID', 'Genome', 'Assigned Function'];
215 : redwards 1.4 foreach my $cid (@cids) {
216 :     foreach my $peg (grep {/^fig/} $fig->cid_to_prots($cid)) {
217 : redwards 1.2 my $p=$peg;
218 :     $p =~ s/fig\|//;
219 :     $p = "<a href='$proteinbase{'fig'}$p'>$peg</a>";
220 :     push @$tab, [$p, ($fig->genus_species($fig->genome_of($peg))), scalar($fig->function_of($peg))];
221 :     }
222 :     }
223 :    
224 :     push @$html, "<h2>Functions in $fam Family</h2>\n",
225 :     "<p>The family $fam has the function ", $fig->family_function($fam), ".\n",
226 :     "The functions shown here are the functions of just the proteins in the SEED database from this family</p>",
227 :     $cgi->start_form,
228 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
229 : redwards 1.4 $cgi->hidden(-name=>'prot'), $cgi->hidden(-name=>'family', -value=>"$fam"), $cgi->submit("Compare FIG Functions"),
230 : redwards 1.2 $cgi->end_form;
231 :     }
232 :     }
233 : redwards 1.1
234 : redwards 1.4 sub suggest_proteins {
235 :     my ($fig,$cgi,$html)=@_;
236 :     my $prot=$cgi->param('prot');
237 :     my @families=$fig->families_for_protein($prot);
238 :     my $knownfamily;
239 :     map {$knownfamily->{$_}=1} @families;
240 :    
241 :     print STDERR "Looking through ", scalar(@families), " families\n";
242 :     my $famcount=1;my $time=time;
243 :     my $content;
244 :     foreach my $fam (@families) {
245 :     print STDERR "$famcount in ", time-$time, "\n"; $time=time; $famcount++;
246 :    
247 :     my @newprots=$fig->proteins_in_family($fam);
248 :     print STDERR "$fam has ", scalar(@newprots), "\n";
249 :     foreach my $p (@newprots) {
250 :     my @newfamilies=$fig->families_for_protein($p);
251 :     foreach my $nf (@newfamilies) {
252 :     next if ($knownfamily->{$nf});
253 :     push @{$content->{$nf}}, $p;
254 :     }
255 :     }
256 :     }
257 :    
258 :     my $tab=[];
259 :     @$tab=map {[$_, $fig->family_function($_), scalar(@{$content->{$_}})]} keys %$content;
260 :     my $col_hdrs=["Family ID", "Family function", "Nummber of proteins in family"];
261 :    
262 :     push @$html, $cgi->p("$prot is in families ", join ",", @families),
263 :     $cgi->p("These are the other families that $prot should also appear in:"),
264 :     &HTML::make_table($col_hdrs, $tab, "Other Families"), "\n";
265 :     }
266 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3