[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 :    
3 :     =pod
4 :    
5 :     =head1 proteinfamilies.cgi
6 :    
7 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
8 : redwards 1.1
9 : redwards 1.7 PLEASE NOTE: Do not attempt to read or understand this code. Please leave now. It is a complete mess because it is very experimental and we are trying stuff out. none of it will work. The exit is this way ---->
10 :    
11 : redwards 1.1 =cut
12 :    
13 :     use strict;
14 :     use FIG;
15 : overbeek 1.26 use FIGjs;
16 : redwards 1.1 use HTML;
17 :     use raelib;
18 :     use CGI;
19 : overbeek 1.11 use CGI::Carp qw(fatalsToBrowser);
20 : redwards 1.1 my $cgi=new CGI;
21 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
22 :    
23 :     my $fig;
24 :     eval {
25 :     $fig = new FIG;
26 :     };
27 :    
28 :     if ($@ ne "")
29 :     {
30 :     my $err = $@;
31 :    
32 :     my(@html);
33 :    
34 :     push(@html, $cgi->p("Error connecting to SEED database."));
35 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
36 :     {
37 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
38 :     }
39 :     else
40 :     {
41 :     push(@html, $cgi->pre($err));
42 :     }
43 :     &HTML::show_page($cgi, \@html, 1);
44 :     exit;
45 :     }
46 :    
47 :     my $html = [];
48 :     my $user = $cgi->param('user');
49 :    
50 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
51 :    
52 : redwards 1.2
53 : overbeek 1.27 if ($cgi->param('equivalence'))
54 : overbeek 1.17 {
55 :     &set_of_equivs($fig,$cgi,$html);
56 :     }
57 :     elsif ($cgi->param('differentiate'))
58 :     {
59 : overbeek 1.22 &set_of_equivs($fig,$cgi,$html);
60 : overbeek 1.17 &differentiate($fig,$cgi,$html);
61 :     }
62 : redwards 1.2 elsif ($cgi->param('family'))
63 :     {
64 :     &show_family($fig,$cgi,$html);
65 : redwards 1.1 }
66 :     elsif ($cgi->param('prot'))
67 :     {
68 : redwards 1.2 &show_protein($fig,$cgi,$html);
69 : redwards 1.1 }
70 : overbeek 1.14 elsif ($cgi->param('fig2kegg'))
71 :     {
72 :     &comparefig2kegg($fig,$cgi,$html);
73 :     }
74 : redwards 1.1 else
75 :     {
76 :     &show_initial($fig,$cgi,$html);
77 :     }
78 :    
79 :     &HTML::show_page($cgi,$html,1);
80 :     exit;
81 :    
82 :    
83 :     sub show_initial {
84 :     my ($fig,$cgi,$html)=@_;
85 :     # generate a blank page
86 : redwards 1.2 push @$html,
87 :     "<h2>Protein Families</h2>\n",
88 :     "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
89 : overbeek 1.27 "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>\n",
90 :     $cgi->start_form(-method=>'get'), "\n",
91 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>\n",
92 :     $cgi->submit(-name=>'equivalence', -value=>"Show Protein Families"), "\n", $cgi->reset, $cgi->end_form;
93 : redwards 1.1 return $html;
94 :     }
95 :    
96 : overbeek 1.22 # "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
97 :     # "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
98 :     # $cgi->submit, $cgi->reset, $cgi->end_form;
99 : redwards 1.7
100 : redwards 1.2 sub show_family {
101 : redwards 1.1 my ($fig,$cgi,$html)=@_;
102 : redwards 1.2 foreach my $fam ($cgi->param('family')) {
103 :     my $tab=[];
104 : overbeek 1.27 my $col_hdrs=['#', 'Protein', 'Other proteins with same amino acid sequence'];
105 :     my $count=1;
106 :    
107 :     foreach my $extid (sort {$a cmp $b} $fig->ext_ids_in_family($fam))
108 :     {
109 :     my $cid=$fig->prot_to_cid($extid);
110 :     my @pegs=map {&protein_link($_)} grep {$_ ne $extid} $fig->cid_to_prots($cid);
111 :     push @$tab, [$count, &protein_link($extid, 1), (join ", ", (@pegs))];
112 :     $count++;
113 : redwards 1.1 }
114 : redwards 1.2
115 :     push @$html, "<h2>$fam Family</h2>\n",
116 :     "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
117 : overbeek 1.27 "Each of these proteins are present in other databases, and this table shows you the ID of the identical proteins in those databases</p>\n",
118 : redwards 1.2 "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
119 :     &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
120 : redwards 1.1 }
121 :     }
122 :    
123 : overbeek 1.11 sub show_protein {
124 : redwards 1.1 my ($fig,$cgi,$html)=@_;
125 : overbeek 1.11 foreach my $peg ($cgi->param('prot')) {
126 : overbeek 1.22 my @families;
127 :     if ($peg =~ /^\d+$/)
128 :     {
129 :     # it is a cid
130 :     @families=$fig->in_family($peg);
131 :     $peg = "CID $peg";
132 :     }
133 :     else
134 :     {
135 :     @families=$fig->families_for_protein($peg);
136 :     }
137 :    
138 : overbeek 1.11 unless (@families)
139 :     {
140 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
141 :     return;
142 : redwards 1.1 }
143 :    
144 : redwards 1.2 my $tab=[];
145 : overbeek 1.20 my $self=$cgi->url(-relative => 1);
146 : overbeek 1.11 foreach my $fam (@families) {
147 :     my %idcount;
148 :     my $noprots=scalar(map {$idcount{$_}=1} $fig->ids_in_family($fam));
149 :     #push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
150 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];
151 : redwards 1.2 }
152 : overbeek 1.11
153 : overbeek 1.17 my $col_hdrs=['Family ID', 'Family Function', 'Number of CIDs in Family', 'Choose Family'];
154 : overbeek 1.11 push @$html, "<h2>Families for $peg</h2>\n",
155 : overbeek 1.17 $cgi->start_form(-method=>'get'),
156 : overbeek 1.11 "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
157 : overbeek 1.17 "A CID is a unique, internal ID we have assigned to proteins with identical sequences",
158 : overbeek 1.11 &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
159 :     $cgi->submit('Show Proteins In Each Family'),
160 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
161 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
162 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
163 :     $cgi->reset, $cgi->end_form;
164 : redwards 1.2 }
165 :     }
166 : redwards 1.10
167 :    
168 : overbeek 1.12
169 :    
170 : redwards 1.10 sub analyse_family {
171 : overbeek 1.12 my ($fig,$cgi,$html, $reverse)=@_;
172 : redwards 1.10 # here are the questions:
173 :     # 1. Given a column in a spreadsheet:
174 :     # 2. Here are the proteins in that column
175 :     # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
176 :     # if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
177 :     # so we recommend that a protein be removed from a family.
178 :     # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
179 :     # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
180 :    
181 :     # Note that column == family, But start with fig and then allow a replace ID feature like before.
182 :    
183 : overbeek 1.15 my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in
184 :     undef $focus if ($focus eq "all");
185 : overbeek 1.12
186 :     my @cols;
187 :     if ($cgi->param("allfams")) {@cols=grep {$cgi->param($_)} $cgi->param("allfams")}
188 :     elsif ($cgi->param("family")) {push @cols, $cgi->param('family')}
189 :     else {die "No families declared!"}
190 :    
191 :     foreach my $col (@cols)
192 : redwards 1.10 {
193 :     # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
194 : overbeek 1.12 my $proteins_in_col;
195 :     map {$proteins_in_col->{$_}=1} $fig->ids_in_family($col);
196 : redwards 1.10
197 :     # @proteins are the proteins in that column, although these are cids and not fids at the moment
198 :     my $familycount;
199 : overbeek 1.12 foreach my $prot (keys %$proteins_in_col) {
200 : redwards 1.10 foreach my $fam ($fig->in_family($prot)) {
201 :     $familycount->{$fam}++;
202 :     }
203 :     }
204 :    
205 :     my $count_of;
206 :     my $fams;
207 : overbeek 1.12 foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount)
208 :     {
209 :     if ($reverse) {($fams, $count_of)=&ids_missing_from_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
210 :     else {($fams, $count_of)=&ids_are_in_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
211 :     }
212 : redwards 1.10
213 :     # create a list of families that we know about
214 : overbeek 1.27 my @fams=grep {$_ ne $col} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
215 : overbeek 1.11 unshift @fams, $col;
216 :    
217 : overbeek 1.15 my $tab=[[["Number of proteins in family", "th colspan=4"], map {scalar(keys %{$fams->{$_}})} @fams]];
218 : redwards 1.10
219 : overbeek 1.11 my @fids;
220 :     if ($cgi->param('sort') eq "genome")
221 :     {
222 :     @fids=sort {$fig->genome_of($a) <=> $fig->genome_of($b)} keys %$count_of;
223 :     }
224 : overbeek 1.15 elsif ($cgi->param('sort') eq "cid")
225 :     {
226 :     @fids=sort {$fig->prot_to_cid($a) <=> $fig->prot_to_cid($b)} keys %$count_of;
227 :     }
228 : overbeek 1.11 else
229 :     {
230 :     @fids=sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of;
231 :     }
232 :    
233 :     my $rowcount;
234 :     foreach my $fid (@fids)
235 :     {
236 : overbeek 1.15 my @row=(++$rowcount, $fig->prot_to_cid($fid), $fid, scalar(keys %{$count_of->{$fid}}));
237 : overbeek 1.11
238 :     foreach my $fam (@fams) {
239 :     $count_of->{$fid}->{$fam} ? push @row, ["Y", "td style='background-color: lightgrey; text-align: center'"] : push @row, " &nbsp ";
240 : redwards 1.10 }
241 :     push @$tab, \@row;
242 :     }
243 : overbeek 1.12
244 : overbeek 1.22 push @$html, $cgi->start_form(-method=>'get'), $cgi->p("Limit display to proteins from ", &choose_focus($cgi), "\n"), $cgi->p("Sort the order by ", &choose_sort($cgi),"\n");
245 : overbeek 1.12 if ($reverse) {
246 :     push @$html, $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family.");
247 :     } else {
248 :     push @$html, $cgi->p("These are proteins that ARE in ", $fig->family_function($col), " ($col) and are in other families that have proteins in this family.");
249 :     }
250 :    
251 : redwards 1.16 # merge cells in the table
252 :     my $skip;
253 :     map {$skip->{$_}=1} (0, 2 .. 40); # ignore everything except column 2
254 :     $tab=&HTML::merge_table_rows($tab, $skip);
255 :    
256 : overbeek 1.12 push @$html,
257 :     $cgi->hidden(-name=>"family", -value=>@cols), $cgi->hidden("prot"), $cgi->hidden(-name=>"user"),
258 : overbeek 1.11 $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
259 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
260 : overbeek 1.15 &HTML::make_table(["Count", "Unique ID", "Protein ID", "Number of fams protein is in", @fams], $tab,' &nbsp; ');
261 : redwards 1.10 }
262 :     }
263 :    
264 :    
265 : overbeek 1.12 sub ids_are_in_fam {
266 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
267 :     # It seems that $sz_family is not right
268 :     map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
269 :     grep {/^$focus/}
270 :     map {$fig->cid_to_prots($_)}
271 :     grep {$proteins_in_col->{$_}}
272 :     ($fig->ids_in_family($f));
273 :     return ($fams, $count_of);
274 :     }
275 : redwards 1.10
276 : overbeek 1.12 sub ids_missing_from_fam {
277 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
278 : redwards 1.10 # It seems that $sz_family is not right
279 : overbeek 1.11 map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
280 : redwards 1.10 grep {/^$focus/}
281 :     map {$fig->cid_to_prots($_)}
282 : overbeek 1.12 grep {!$proteins_in_col->{$_}}
283 : redwards 1.10 ($fig->ids_in_family($f));
284 : overbeek 1.12 return ($fams, $count_of);
285 :     }
286 :    
287 : overbeek 1.11
288 : redwards 1.10
289 : overbeek 1.11 sub choose_focus {
290 :     my ($cgi)=@_;
291 :     my %choices=(
292 : overbeek 1.15 "all" => "All",
293 : overbeek 1.11 "fig" => "FIGfams",
294 :     "tigr" => "TIGRfams",
295 :     "pfam" => "PFAM",
296 :     "sp" => "SwissProt",
297 :     "kegg" => "KEGG",
298 :     "pir" => "PIR SuperFams",
299 :     "mcl" => "MCL",
300 :     "cog" => "COG",
301 :     );
302 :    
303 : overbeek 1.22 my $default = $cgi->param("focus"); unless ($default) {$default="fig"}
304 : overbeek 1.11
305 :     return $cgi->popup_menu(
306 :     -name => "focus",
307 : overbeek 1.15 -values => [sort {$choices{$a} cmp $choices{$b}} keys %choices],
308 :     -labels => \%choices,
309 :     -default => $default,
310 :     );
311 :     }
312 :    
313 :     sub choose_sort {
314 :     my ($cgi)=@_;
315 :     my %choices=(
316 :     "none" => "Number of Proteins",
317 :     "genome" => "Genome",
318 :     "cid" => "Unique ID",
319 :     );
320 :    
321 :     my $default = $cgi->param("sort"); unless ($default) {$default="none"}
322 :    
323 :     return $cgi->popup_menu(
324 :     -name => "sort",
325 :     -values => [sort {$choices{$a} cmp $choices{$b}} keys %choices],
326 : overbeek 1.11 -labels => \%choices,
327 :     -default => $default,
328 :     );
329 :     }
330 : redwards 1.10
331 : redwards 1.7
332 : overbeek 1.14 sub comparefig2kegg {
333 :     my ($fig,$cgi,$html)=@_;
334 :    
335 :     my $classification; my %subsystem;
336 :     # read classification from kegg file
337 :     if (open(IN, "$FIG_Config::global/ProteinFamilies/kegg_classificaation.txt")) {
338 :     while (<IN>) {
339 :     chomp;
340 :     my @a=split /\t/;
341 :     my $id=shift(@a);
342 :     $subsystem{"kegg|$id"}=pop(@a);
343 :     push @{$classification->{"kegg|$id"}}, \@a;
344 :     }
345 :     }
346 :    
347 :    
348 :     my $tab=[];
349 :     # find out what families our proteins are in
350 :     map {
351 :     my $prot=$_;
352 :     map {
353 :     my $fam=$_;
354 :     if ($fam =~ /^fig/) {
355 :     my %ss;
356 :     map {$ss{$_->[0]}++} ($fig->subsystems_for_role($fig->family_function($fam)));
357 :     map {my $ss=$_; push @$tab, [$prot, @{$fig->subsystem_classification($ss)}, $ss, $fam, $fig->family_function($fam)]} keys %ss;
358 :     }
359 :     else {
360 :     map {push @$tab, [$prot, @{$_}, $subsystem{$fam}, $fam, $fig->family_function($fam)]} @{$classification->{$fam}}
361 :     }
362 :     } grep {/^fig/ || /^kegg/} $fig->families_for_protein($prot);
363 :     } $cgi->param('proteins');
364 :    
365 :     my $col_hdrs=['Protein', ['Classification', 'th colspan=2'], 'Subsystem', 'Family ID', 'Family Function'];
366 :     push @$html, &HTML::make_table($col_hdrs, $tab, "Families"), "\n",
367 :     }
368 : overbeek 1.17
369 :     ## Based on request from Ross:
370 :     # Subject: Re: fig.pl
371 :     # Date: October 4, 2005 6:21:00 AM PDT
372 :     # From: Ross@theFIG.info
373 :     # To: raedwards@gmail.com
374 :     #
375 :     #Rob,
376 :     #
377 :     #It seems to me that you got that right, and the function is certainly at the
378 :     #core of what is needed. I have been thinking about what I would want with
379 :     #protein families,
380 :     #and it goes something like this:
381 :     #
382 :     #1. Given a protein FIG1, you can get the set of proteins with the same CID
383 :     #(call it CID1). Call this set EQUIV, since it is really a set of IDs that are
384 :     #equivalent.
385 :     #
386 :     #2. From the set of IDs in EQUIV, you can get the set of protein families (from
387 :     #all sources) that contain the IDs in EQUIV. This gives a table
388 :     #
389 :     #
390 :     # [,ID,Function,Family,FamilyFunction]
391 :     #
392 :     # All of the table entries describe a family containing CID1.
393 :     #
394 :     #3. From this table you select two Families to be compared (e.g., one KEGG
395 :     #family vs a FIG family). This ends the first part -- selecting the precise
396 :     #two
397 :     # families to be compared. Each of the two families should be thought of
398 :     #as [CID,ID,Family].
399 :     #
400 :     #4. The comparison of SET1 and SET2 uses essentially the function you
401 :     #implemented. You need to form three sets:
402 :     #
403 :     # the intersection of SET1 and SET2
404 :     # SET1 - SET2
405 :     # SET2 - SET1
406 :     #
407 :     # You may or may not wish to display each of the three sets. The user
408 :     #should be able to select which. When you think
409 :     # of one of these sets, it is useful to think of
410 :     #{CID,Family,Set-of-CIDs}. That is, it is not just a set of CIDs; it should be
411 :     #viewed as a
412 :     # set of CIDs from a specific family that was chosen because it
413 :     #contained a specific CID.
414 :     #
415 :     #5. When displaying a set of proteins from a given family, you start with
416 :     #(CID,Family,Set-of-CIDs). Each line should contain
417 :     #
418 :     # 1. A single CID from the Set-of-CIDs (call this CID2).
419 :     #
420 :     # 2. A count of the number of sources that place both CID1 and CID2
421 :     #in the same family (note that this is not a count of the families that include
422 :     #both CID1 and CID2)
423 :     #
424 :     # 3. For each source a "Y" or space indicating whether or not the
425 :     #source placed CID1 and CID2 into the same family (i.e., whether or not there
426 :     # is at least one family from the source that contains both
427 :     #CID1 and CID2).
428 :     #
429 :     #That is what I think should be done. Can we discuss it?
430 :     #
431 :    
432 :    
433 :    
434 :     sub set_of_equivs {
435 : overbeek 1.22 ($fig, $cgi, $html)=@_;
436 : overbeek 1.27 my $peg=$cgi->param('prot');
437 :     my $tab=[];
438 :     my $allfams;
439 : overbeek 1.19
440 : overbeek 1.27 #begin the html so we can add hidden things
441 :     my $genusspecies=$fig->genus_species($fig->genome_of($peg));
442 :     push @$html, (
443 :     $cgi->start_form(-method=>'get'), "\n",
444 :     $cgi->p("<h1>Protein <b>$peg</b>: $genusspecies</h1>"), "\n",
445 :     $cgi->hidden(-name=>'querycid', -value=>$fig->prot_to_cid($peg)), "\n",
446 :     $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table below shows all the families that contain a protein with the same sequence as <b>$peg</b>, although each family uses different IDs. The number of different IDs in each protein family and the number of unique protein sequences in each family are also shown. The latter is often less than the former since many databases contain identical proteins with different identifiers (for example the same protein from different <i>$genusspecies</i> genome sequences).', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
447 :     );
448 :    
449 :     foreach my $fam ($fig->families_for_protein($peg))
450 :     {
451 :     my $ffunc=$fig->family_function($fam) || " &nbsp; ";
452 :     push @$tab, [
453 :     &protein_link($peg),
454 :     scalar($fig->function_of($peg)),
455 :     "<a href=\'proteinfamilies.cgi?family=$fam&user=$user\'>$fam</a>",
456 :     $ffunc,
457 :     $fig->ext_sz_family($fam),
458 :     $fig->sz_family($fam),
459 :     ];
460 :     $allfams->{$fam}="$fam :" . $fig->family_function($fam);
461 :     }
462 : overbeek 1.19
463 : overbeek 1.17
464 :     $tab=&HTML::merge_table_rows($tab, {3=>1, 4=>1});
465 : overbeek 1.27
466 : overbeek 1.22 my $radbut={
467 :     "1not2"=>"In family one and NOT family two\n",
468 :     "2not1"=>"In family two and NOT family one\n",
469 : overbeek 1.27 "1and2"=>"In both families (intersection)\n",
470 :     "1or2" =>"In either family (union)\n",
471 : overbeek 1.22 };
472 : overbeek 1.27
473 :     # sort the list of families in this table but put the fig families at the beginning of the list
474 :     my @familylist=sort {$a cmp $b} grep {$_ !~ /^fig/} keys %$allfams;
475 :     unshift @familylist, sort {$a cmp $b} grep {$_ =~ /^fig/} keys %$allfams;
476 :    
477 :     my $col_hdrs=['Protein', 'Function', 'Family', 'Family Function', 'External<br>IDs In<br>Family', 'Unique<br>Proteins<br>In Family'];
478 : overbeek 1.21 push @$html, &HTML::make_table($col_hdrs, $tab, ""), "\n", $cgi->p("To differentiate families in this table, please choose two families:"),
479 : overbeek 1.27 "Family 1: &nbsp; ", $cgi->popup_menu(-name=>"family1", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[0]),
480 :     " <br /> Family 2: &nbsp; ", $cgi->popup_menu(-name=>"family2", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[1]),
481 : overbeek 1.18 $cgi->p("Show proteins:<br /><ul>\n",
482 : overbeek 1.27 $cgi->radio_group(-name=>"diff", -values=>[keys %$radbut], -labels=>$radbut, -rows=>4),
483 : overbeek 1.22 "</ul>\n"),
484 :     $cgi->hidden(-name=>'prot', -value=>$peg),
485 : overbeek 1.27 $cgi->submit(-name=>"differentiate", -value=>"Compare these families"), $cgi->reset, $cgi->end_form();
486 : overbeek 1.17 }
487 :    
488 : overbeek 1.27 # initially I added this option in, with appropriate help text, but then I added the union option, and I think that surplants this, so I removed it!
489 :     # $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table will show only those proteins from one of the families that contains proteins that could be in the other family but are not. Most likely this will only be a few proteins, but the table above shows that there are a large number of proteins in one family but not another. Many of these are proteins that are missing from the database. Checking this box will show all the proteins from the chosen families, not just those with missing proteins. By default, you should not check this box.', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
490 :     # "Show all correspondences, not just those with missing proteins: ", $cgi->checkbox(-name=>"show", -value=>"all", -label=>""),
491 :    
492 : overbeek 1.22
493 : overbeek 1.17 sub differentiate {
494 : overbeek 1.22 ($fig, $cgi, $html)=@_;
495 :    
496 :     #my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in
497 :     #undef $focus if ($focus eq "all");
498 :     my $focus=$cgi->param('family2');
499 :     $focus =~ s/\|.*//;
500 :    
501 :     my ($fam_id1, $fam_id2)=($cgi->param('family1'), $cgi->param('family2'));
502 :     if ($fam_id1 eq $fam_id2)
503 :     {
504 :     push @$html, "<h2 style='color: red'>Please choose two different protein families</h2>";
505 :     return;
506 :     }
507 :     my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);
508 :    
509 : overbeek 1.27 # figure out our families
510 :     my $peg=$cgi->param('prot');
511 :     my @families=sort {$a cmp $b} grep {$_ ne $fam_id1} grep {$_ ne $fam_id2} $fig->families_for_protein($peg);
512 : overbeek 1.22 unshift @families, ($fam_id1, $fam_id2);
513 :     my @source=@families;
514 :     map {/^(.*?)\|/; $_=$1} @source;
515 :    
516 :     # now figure out all the external IDs in those families
517 :     my $extids;
518 : overbeek 1.27 foreach my $fam (@families)
519 : overbeek 1.22 {
520 : overbeek 1.27 map {$extids->{$_}->{$fam}=1} $fig->ext_ids_in_family($fam);
521 :     }
522 : overbeek 1.22
523 :     # finally generate the table. Note that there are three different arrays that we operate on depending on the user input
524 :     # but it really only changes which set algorith we use. Each array is handled identically.
525 :     my $tab;
526 : overbeek 1.27
527 :     # This block of code is the one that really decides what the data is that is analyzed.
528 :     # $fam1 and $fam2 are references to arrays containing all the CIDs in each family. We combine them with intersection, union, or differences
529 :     # and then process the remaining IDs.
530 :    
531 :     my $set=[];
532 :     if ($cgi->param("diff") eq "1and2")
533 :     {
534 :     $set=&set_utilities::intersection($fam1, $fam2);
535 :     }
536 :     elsif ($cgi->param("diff") eq "1not2")
537 :     {
538 :     $set=&set_utilities::set_diff($fam1, $fam2);
539 :     }
540 :     elsif ($cgi->param("diff") eq "2not1")
541 :     {
542 :     $set=&set_utilities::set_diff($fam2, $fam1);
543 :     }
544 :     elsif ($cgi->param("diff") eq "1or2")
545 : overbeek 1.22 {
546 : overbeek 1.27 $set=&set_utilities::union($fam1, $fam2);
547 :     }
548 :    
549 :     print STDERR "FAMILIES: ", join(" ", @families), "\n";
550 :     foreach my $cid (@$set)
551 :     {
552 :     #my $row=["<a href='proteinfamilies.cgi?prot=$cid'>$cid</a>"];
553 :     my $row=[];
554 : overbeek 1.23 my $seen; my $mismatchcolor;
555 : overbeek 1.27 foreach my $prot (sort $fig->cid_to_prots($cid))
556 : overbeek 1.22 {
557 : overbeek 1.27 for (my $i=0; $i<=$#families; $i++)
558 : overbeek 1.22 {
559 :     # add the protein info to the cell in the table if it this family has that protein. Note that we have seen it, and increment the column counter
560 :     # this if is if the protein that we are looking at is in the family for this column then add it
561 :    
562 :     # if the protein is not added, we want to know if it has the same start characters as the family (i.e. from the same source), and note that.
563 : overbeek 1.27 if ($extids->{$prot}->{$families[$i]})
564 :     {
565 :     $seen->{$prot}=1;
566 :     $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
567 :     }
568 :     elsif ($prot =~ /^$source[$i]/)
569 :     {
570 :     $mismatchcolor->{$i}=1;
571 :     $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
572 :     }
573 :     }
574 :     }
575 :    
576 :     unless ($#$row == $#families) {$#$row=$#families}
577 : overbeek 1.22
578 : overbeek 1.27 # color those cells that have a mismatch. note that this colors the whole cell even if there is more than one protein mismatching
579 : overbeek 1.22 map {$row->[$_] = [$row->[$_], "td style='background-color: #FF3366'"]} keys %$mismatchcolor;
580 : overbeek 1.27 # change empty cells
581 : overbeek 1.22 map {$row->[$_] = " &nbsp; " unless ($row->[$_])} (0 .. $#$row);
582 :    
583 :     # if we want to show everything do so, otherwise only show the rows where there is a missing protein
584 : overbeek 1.27 if (($cgi->param("diff") eq "1and2") || ($cgi->param("diff") eq "1or2") || ($cgi->param('show') eq "all"))
585 : overbeek 1.22 {
586 :     push @$tab, $row;
587 :     }
588 : overbeek 1.27 elsif ($cgi->param("diff") eq "1not2" && $row->[1] ne " &nbsp; ")
589 : overbeek 1.22 {
590 :     push @$tab, $row;
591 :     }
592 : overbeek 1.27 elsif ($cgi->param("diff") eq "2not1" && $row->[0] ne " &nbsp; ")
593 : overbeek 1.25 {
594 : overbeek 1.26 #($row->[1], $row->[2])=($row->[2], $row->[1]);
595 : overbeek 1.25 push @$tab, $row;
596 :     }
597 : overbeek 1.22 }
598 :    
599 : overbeek 1.27
600 :     #generate the titles
601 :     my $title;
602 : overbeek 1.22 ($cgi->param("diff") eq "1and2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
603 : overbeek 1.27 ($cgi->param("diff") eq "1or2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) OR in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
604 : overbeek 1.22 ($cgi->param("diff") eq "1not2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) BUT NOT in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
605 :     ($cgi->param("diff") eq "2not1") ? $title.=$fig->family_function($fam_id2). " ($fam_id2) BUT NOT in " . $fig->family_function($fam_id1). " ($fam_id1)\n" : 1;
606 : overbeek 1.27
607 :     push @$html, (
608 :     "<h3 style='text-align: center'>Comparison of proteins in $title</h3>\n",
609 :     "<p>The table shows those proteins that are in $title. The rows show proteins with the same sequence. All the IDs along the row are different IDs from identical protein sequences. The columns of the table are different protein families. If a protein in an individual cell has a red background, that protein is either not in the family for that column, or is in that family and another family in the same database.</p>\n",
610 :     "<p>To find out which families each protein is in, scroll the mouse over the link to the protein database. The popup window will show you the protein families for each protein. Note, however, if this protein is in the family for that column only that family is currently shown. If the window has a green background the protein is in the same family as the column as a whole. If the window has a red background the protein is in a different family than the column as a whole. The red background and the red cell color are complimentary and reinforce that these are proteins you should look at.</p>\n",
611 :     );
612 :    
613 : overbeek 1.22
614 :     my @headers=@families;
615 : overbeek 1.26 map {$_ = "<a " . FIGjs::mouseover("Column Family", $fig->family_function($_) . " ($_)", '') . " href='proteinfamilies.cgi?family=$_'>$_</a>"} @headers;
616 :     if ($tab)
617 :     {
618 : overbeek 1.27 push @$html, HTML::make_table(\@headers, $tab, "Proteins In $title");
619 :     push @$html, "<p> &nbsp; </p>\n"; # this is filler to shift the page down a little and allow room for the mouseover
620 : overbeek 1.26 }
621 :     else
622 :     {
623 : overbeek 1.27 my $sorry="<p>Sorry there were no protein families that satisfied looking for</p>\n<p>$title</p>";
624 :     if (($cgi->param("diff") eq "1not2") || ($cgi->param("diff") eq "2not1")) {$sorry .= "<p>and had candidate proteins that could be in those families</p>"}
625 :     push @$html, $cgi->h3("<div style='color: blue; text-align: center'>$sorry</span>");
626 : overbeek 1.18 }
627 :     }
628 :    
629 : overbeek 1.27 =head2 protein_link()
630 : overbeek 1.18
631 : overbeek 1.27 This takes a protein ID and returns the link (full link including ID) back to the appropriate database.
632 : overbeek 1.18
633 : overbeek 1.27 If addmouseover is true then a mouseover will be added showing the families the peg is in
634 : overbeek 1.21
635 : overbeek 1.27 If $fam is provided it will be the header of the mouseover popup.
636 : overbeek 1.21
637 : overbeek 1.27 =cut
638 : overbeek 1.22
639 :     sub protein_link {
640 : overbeek 1.27 my ($p, $addmouseover, $fam) =@_;
641 : overbeek 1.22 my %proteinbase=(
642 :     "fig" => "protein.cgi?user=$user&prot=fig|",
643 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
644 :     "sp" => "http://www.expasy.org/uniprot/",
645 :     "tr" => "http://www.expasy.org/uniprot/",
646 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
647 :     );
648 :    
649 : overbeek 1.26 my $mouseovertitle="Protein Families";
650 :     if ($fam)
651 :     {
652 : overbeek 1.27 $mouseovertitle="<i>Column family: " . $fig->family_function($fam) . " ($fam)</i>";
653 :     }
654 :     my $familiesforp = "<b>Families for $p:</b><br>";
655 :     my ($hcolor, $bgcolor)=('#11AA66','#BBFFBB'); # text background color.
656 :    
657 :     # if the protein is in our family of interest show just that, otherwise show all the families
658 :     my @thisfam=$fig->ext_family_for_id($p);
659 :     if (grep {$_ eq $fam} @thisfam)
660 :     {
661 :     $familiesforp .= $fig->family_function($fam) . " ($fam)";
662 :     }
663 :     elsif (scalar(@thisfam))
664 :     {
665 :     $familiesforp .= "<ul>" . join("", map {"<li> " . $fig->family_function($_) . " ($_)</li>"} @thisfam) . "</ul>";
666 :     if (!$fam) {($hcolor, $bgcolor)=('','')} # use the default colors
667 :     else {($hcolor, $bgcolor)=('#CC0000', '#FF3366')} # we're doing a comparison and the families are different so color them red
668 :     }
669 :     else
670 :     {
671 :     $familiesforp="<b>Families for $p:</b><br>No protein families";
672 :     }
673 :    
674 :     foreach my $key (keys %proteinbase)
675 :     {
676 :     if ($p =~ /^$key/ && $addmouseover)
677 :     {
678 :     $p =~ s/^(.*?)\|//;
679 :     $p = "<a " . FIGjs::mouseover($mouseovertitle, $familiesforp, '', '1', $hcolor, $bgcolor) . " href='$proteinbase{$key}$p'>$1|$p</a>";
680 :     }
681 :     elsif ($p =~ /^$key/)
682 :     {
683 :     $p =~ s/^(.*?)\|//;
684 :     $p = "<a href='$proteinbase{$key}$p'>$1|$p</a>";
685 :     }
686 : overbeek 1.26 }
687 : overbeek 1.22 return $p;
688 :     }
689 :    
690 : overbeek 1.21
691 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3