[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.34 - (view) (download)

1 : redwards 1.1 # -*- perl -*-
2 : olson 1.33 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : redwards 1.1
20 :     =pod
21 :    
22 :     =head1 proteinfamilies.cgi
23 :    
24 : redwards 1.3 A base web interface for getting information about protein families in and out. Initially we are going to make a 3 (or 4) column table of protein, family and other proteins.
25 : redwards 1.1
26 : redwards 1.7
27 : redwards 1.1 =cut
28 :    
29 :     use strict;
30 :     use FIG;
31 : overbeek 1.26 use FIGjs;
32 : redwards 1.1 use HTML;
33 :     use raelib;
34 :     use CGI;
35 : overbeek 1.11 use CGI::Carp qw(fatalsToBrowser);
36 : redwards 1.1 my $cgi=new CGI;
37 :     use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.
38 :    
39 :     my $fig;
40 :     eval {
41 :     $fig = new FIG;
42 :     };
43 :    
44 :     if ($@ ne "")
45 :     {
46 :     my $err = $@;
47 :    
48 :     my(@html);
49 :    
50 :     push(@html, $cgi->p("Error connecting to SEED database."));
51 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
52 :     {
53 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
54 :     }
55 :     else
56 :     {
57 :     push(@html, $cgi->pre($err));
58 :     }
59 :     &HTML::show_page($cgi, \@html, 1);
60 :     exit;
61 :     }
62 :    
63 :     my $html = [];
64 :     my $user = $cgi->param('user');
65 :    
66 :     unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
67 :    
68 : redwards 1.2
69 : overbeek 1.27 if ($cgi->param('equivalence'))
70 : overbeek 1.17 {
71 :     &set_of_equivs($fig,$cgi,$html);
72 :     }
73 : overbeek 1.34 elsif ($cgi->param('bysource'))
74 :     {
75 :     &show_sources;
76 :     if ($cgi->param('family')) {&show_family($fig,$cgi,$html)}
77 :     }
78 : overbeek 1.17 elsif ($cgi->param('differentiate'))
79 :     {
80 : overbeek 1.32 &set_of_equivs($fig,$cgi,$html) if ($cgi->param('prot'));
81 : overbeek 1.17 &differentiate($fig,$cgi,$html);
82 :     }
83 : redwards 1.2 elsif ($cgi->param('family'))
84 :     {
85 :     &show_family($fig,$cgi,$html);
86 : redwards 1.1 }
87 :     elsif ($cgi->param('prot'))
88 :     {
89 : redwards 1.2 &show_protein($fig,$cgi,$html);
90 : redwards 1.1 }
91 : overbeek 1.14 elsif ($cgi->param('fig2kegg'))
92 :     {
93 :     &comparefig2kegg($fig,$cgi,$html);
94 :     }
95 : redwards 1.1 else
96 :     {
97 :     &show_initial($fig,$cgi,$html);
98 :     }
99 :    
100 : overbeek 1.34
101 :    
102 :    
103 :    
104 : redwards 1.1 &HTML::show_page($cgi,$html,1);
105 :     exit;
106 :    
107 :    
108 :     sub show_initial {
109 :     my ($fig,$cgi,$html)=@_;
110 :     # generate a blank page
111 : overbeek 1.34
112 :     # generate the buttons for each of the submits
113 :     my $fams=&families();
114 :     my $buttons="<table width=100%><tr>".
115 :     join("\n",
116 :     map {"<td><input type='submit' name='bysource' value='".$fams->{$_}."' /></td>"} sort {$fams->{$a} cmp $fams->{$b}} keys %$fams
117 :     )."</tr></table>";
118 :    
119 : redwards 1.2 push @$html,
120 : overbeek 1.34 $cgi->start_form(-method=>'get'), "\n",
121 : redwards 1.2 "<h2>Protein Families</h2>\n",
122 : overbeek 1.34
123 :     $cgi->div({class=>'enterprotein'}, "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
124 : overbeek 1.27 "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>\n",
125 :     "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>\n",
126 : overbeek 1.34 $cgi->submit(-name=>'equivalence', -value=>"Show Protein Families"), "\n", $cgi->reset,),
127 :    
128 :     $cgi->div({class=>'byfamily'}, "<p>You can also select one of the data sources below to choose families for that family:</p>",
129 :     $buttons),
130 :    
131 :    
132 :    
133 :     $cgi->end_form;
134 : redwards 1.1 return $html;
135 :     }
136 :    
137 : overbeek 1.22 # "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
138 :     # "Please enter a family id: ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
139 :     # $cgi->submit, $cgi->reset, $cgi->end_form;
140 : redwards 1.7
141 : redwards 1.2 sub show_family {
142 : redwards 1.1 my ($fig,$cgi,$html)=@_;
143 : overbeek 1.32 my $fam=$cgi->param('family');
144 : overbeek 1.34
145 :     # proteins table
146 :     my $tab1=[];
147 :     my $colhdr1=['#', 'Protein', 'Other proteins with same amino acid sequence'];
148 :    
149 :     # families table
150 :     my $tab2=[];
151 :     my $colhdr2=["Family Function", "Number of proteins<br>in both families", "Compare"];
152 :    
153 : overbeek 1.32 my $count=1;
154 : overbeek 1.34 my $otherfamilies;
155 :    
156 :     # first, generate the list of identical proteins
157 : overbeek 1.32 foreach my $extid (sort {$a cmp $b} $fig->ext_ids_in_family($fam))
158 :     {
159 :     my $cid=$fig->prot_to_cid($extid);
160 : overbeek 1.34 foreach my $newfamily ($fig->in_family($cid)) {$otherfamilies->{$newfamily}++}
161 : overbeek 1.32 my @pegs=map {&protein_link($_)} grep {$_ ne $extid} $fig->cid_to_prots($cid);
162 : overbeek 1.34 push @$tab1, [$count, &protein_link($extid, 1), (join ", ", (@pegs))];
163 : overbeek 1.32 $count++;
164 :     }
165 :    
166 : overbeek 1.34 # next, generate the list of families that these are also in
167 : overbeek 1.32 if ($cgi->param('comparepairs'))
168 :     {
169 :     &comparepairs($fig, $cgi, $html);
170 :     }
171 :     else
172 :     {
173 : overbeek 1.34 foreach my $newfam (keys %$otherfamilies)
174 :     {
175 :     my $func=$fig->family_function($newfam);
176 :     $func or ($func=$newfam);
177 :     my $link="<a href=\'proteinfamilies.cgi?family=$newfam&user=$user\'>$func</a>";
178 :     my $compare="<a href=\"proteinfamilies.cgi?user=$user&family1=$fam&family2=$newfam&diff=1or2&differentiate=Compare+these+families\" ".
179 :     " target=\"window_$$\">Compare</a>";
180 :     push @$tab2, [$link, $otherfamilies->{$newfam}, $compare];
181 :     }
182 : redwards 1.1 }
183 : overbeek 1.34 push @$html, "<h2>$fam Family : ", $fig->family_function($fam), "</h2>\n",
184 :     $cgi->p("The family $fam has the function <em>", $fig->family_function($fam), "</em>, and contains ", $fig->sz_family($fam), " proteins.",
185 :     "Each of these proteins are present in other databases, and we have cross mapped them"),
186 :     $cgi->p("The table below shows you the other families that proteins in $fam are also in, and the number of other proteins.",
187 :     "Click on the family name to see that protein, or on the compare link to compare the two families",),
188 :     &HTML::make_table($colhdr2, $tab2, "Other families that these proteins are in"),
189 :     $cgi->h4("<a href=\"proteinfamilies.cgi?family=$fam&comparepairs=1&user=$user\">Compare this family to other protein families</a>"),
190 :    
191 :     $cgi->hr, $cgi->p("This table shows you the ID of the identical proteins in other databases</p>\n",
192 :     "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",),
193 :     &HTML::make_table($colhdr1, $tab1, "Proteins in " . $fig->family_function($fam) . " ($fam)");
194 : redwards 1.1 }
195 :    
196 : overbeek 1.11 sub show_protein {
197 : redwards 1.1 my ($fig,$cgi,$html)=@_;
198 : overbeek 1.11 foreach my $peg ($cgi->param('prot')) {
199 : overbeek 1.22 my @families;
200 :     if ($peg =~ /^\d+$/)
201 :     {
202 :     # it is a cid
203 :     @families=$fig->in_family($peg);
204 :     $peg = "CID $peg";
205 :     }
206 :     else
207 :     {
208 :     @families=$fig->families_for_protein($peg);
209 :     }
210 :    
211 : overbeek 1.11 unless (@families)
212 :     {
213 :     push @$html, "<h2 style='color: red'>Sorry, $peg is not in any protein families</h2>";
214 :     return;
215 : redwards 1.1 }
216 :    
217 : redwards 1.2 my $tab=[];
218 : overbeek 1.20 my $self=$cgi->url(-relative => 1);
219 : overbeek 1.11 foreach my $fam (@families) {
220 :     my %idcount;
221 :     my $noprots=scalar(map {$idcount{$_}=1} $fig->ids_in_family($fam));
222 :     #push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $fig->sz_family($fam), $cgi->checkbox(-name=>$fam, -label=>'')];
223 :     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];
224 : redwards 1.2 }
225 : overbeek 1.11
226 : overbeek 1.17 my $col_hdrs=['Family ID', 'Family Function', 'Number of CIDs in Family', 'Choose Family'];
227 : overbeek 1.11 push @$html, "<h2>Families for $peg</h2>\n",
228 : overbeek 1.17 $cgi->start_form(-method=>'get'),
229 : overbeek 1.11 "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
230 : overbeek 1.17 "A CID is a unique, internal ID we have assigned to proteins with identical sequences",
231 : overbeek 1.11 &HTML::make_table($col_hdrs, $tab, "Families for $peg"), "\n",
232 :     $cgi->submit('Show Proteins In Each Family'),
233 :     $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
234 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
235 :     $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
236 :     $cgi->reset, $cgi->end_form;
237 : redwards 1.2 }
238 :     }
239 : redwards 1.10
240 :    
241 : overbeek 1.12
242 :    
243 : redwards 1.10 sub analyse_family {
244 : overbeek 1.12 my ($fig,$cgi,$html, $reverse)=@_;
245 : redwards 1.10 # here are the questions:
246 :     # 1. Given a column in a spreadsheet:
247 :     # 2. Here are the proteins in that column
248 :     # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
249 :     # if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
250 :     # so we recommend that a protein be removed from a family.
251 :     # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
252 :     # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
253 :    
254 :     # Note that column == family, But start with fig and then allow a replace ID feature like before.
255 :    
256 : overbeek 1.15 my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in
257 :     undef $focus if ($focus eq "all");
258 : overbeek 1.12
259 :     my @cols;
260 :     if ($cgi->param("allfams")) {@cols=grep {$cgi->param($_)} $cgi->param("allfams")}
261 :     elsif ($cgi->param("family")) {push @cols, $cgi->param('family')}
262 :     else {die "No families declared!"}
263 :    
264 :     foreach my $col (@cols)
265 : redwards 1.10 {
266 :     # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
267 : overbeek 1.12 my $proteins_in_col;
268 :     map {$proteins_in_col->{$_}=1} $fig->ids_in_family($col);
269 : redwards 1.10
270 :     # @proteins are the proteins in that column, although these are cids and not fids at the moment
271 :     my $familycount;
272 : overbeek 1.12 foreach my $prot (keys %$proteins_in_col) {
273 : redwards 1.10 foreach my $fam ($fig->in_family($prot)) {
274 :     $familycount->{$fam}++;
275 :     }
276 :     }
277 :    
278 :     my $count_of;
279 :     my $fams;
280 : overbeek 1.12 foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount)
281 :     {
282 :     if ($reverse) {($fams, $count_of)=&ids_missing_from_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
283 :     else {($fams, $count_of)=&ids_are_in_fam($fig, $f, $focus, $proteins_in_col, $fams, $count_of)}
284 :     }
285 : redwards 1.10
286 :     # create a list of families that we know about
287 : overbeek 1.27 my @fams=grep {$_ ne $col} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
288 : overbeek 1.11 unshift @fams, $col;
289 :    
290 : overbeek 1.15 my $tab=[[["Number of proteins in family", "th colspan=4"], map {scalar(keys %{$fams->{$_}})} @fams]];
291 : redwards 1.10
292 : overbeek 1.11 my @fids;
293 :     if ($cgi->param('sort') eq "genome")
294 :     {
295 :     @fids=sort {$fig->genome_of($a) <=> $fig->genome_of($b)} keys %$count_of;
296 :     }
297 : overbeek 1.15 elsif ($cgi->param('sort') eq "cid")
298 :     {
299 :     @fids=sort {$fig->prot_to_cid($a) <=> $fig->prot_to_cid($b)} keys %$count_of;
300 :     }
301 : overbeek 1.11 else
302 :     {
303 :     @fids=sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of;
304 :     }
305 :    
306 :     my $rowcount;
307 :     foreach my $fid (@fids)
308 :     {
309 : overbeek 1.15 my @row=(++$rowcount, $fig->prot_to_cid($fid), $fid, scalar(keys %{$count_of->{$fid}}));
310 : overbeek 1.11
311 :     foreach my $fam (@fams) {
312 :     $count_of->{$fid}->{$fam} ? push @row, ["Y", "td style='background-color: lightgrey; text-align: center'"] : push @row, " &nbsp ";
313 : redwards 1.10 }
314 :     push @$tab, \@row;
315 :     }
316 : overbeek 1.12
317 : overbeek 1.34 push @$html, $cgi->start_form(-method=>'get'), $cgi->p("Sort the order by ", &choose_sort($cgi),"\n");
318 : overbeek 1.12 if ($reverse) {
319 :     push @$html, $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family.");
320 :     } else {
321 :     push @$html, $cgi->p("These are proteins that ARE in ", $fig->family_function($col), " ($col) and are in other families that have proteins in this family.");
322 :     }
323 :    
324 : redwards 1.16 # merge cells in the table
325 :     my $skip;
326 :     map {$skip->{$_}=1} (0, 2 .. 40); # ignore everything except column 2
327 :     $tab=&HTML::merge_table_rows($tab, $skip);
328 :    
329 : overbeek 1.12 push @$html,
330 :     $cgi->hidden(-name=>"family", -value=>@cols), $cgi->hidden("prot"), $cgi->hidden(-name=>"user"),
331 : overbeek 1.11 $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
332 :     $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
333 : overbeek 1.15 &HTML::make_table(["Count", "Unique ID", "Protein ID", "Number of fams protein is in", @fams], $tab,' &nbsp; ');
334 : redwards 1.10 }
335 :     }
336 :    
337 :    
338 : overbeek 1.12 sub ids_are_in_fam {
339 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
340 :     # It seems that $sz_family is not right
341 :     map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
342 :     grep {/^$focus/}
343 :     map {$fig->cid_to_prots($_)}
344 :     grep {$proteins_in_col->{$_}}
345 :     ($fig->ids_in_family($f));
346 :     return ($fams, $count_of);
347 :     }
348 : redwards 1.10
349 : overbeek 1.12 sub ids_missing_from_fam {
350 :     my ($fig, $f, $focus, $proteins_in_col, $fams, $count_of)=@_;
351 : redwards 1.10 # It seems that $sz_family is not right
352 : overbeek 1.11 map {$fams->{$f}->{$_}++; $count_of->{$_}->{$f}++}
353 : redwards 1.10 grep {/^$focus/}
354 :     map {$fig->cid_to_prots($_)}
355 : overbeek 1.12 grep {!$proteins_in_col->{$_}}
356 : redwards 1.10 ($fig->ids_in_family($f));
357 : overbeek 1.12 return ($fams, $count_of);
358 :     }
359 :    
360 : overbeek 1.11
361 : redwards 1.10
362 : overbeek 1.34 sub families {
363 : overbeek 1.11 my %choices=(
364 : overbeek 1.34 "aclame" => "Aclame",
365 : overbeek 1.11 "fig" => "FIGfams",
366 :     "tigr" => "TIGRfams",
367 :     "pfam" => "PFAM",
368 :     "sp" => "SwissProt",
369 :     "kegg" => "KEGG",
370 :     "pir" => "PIR SuperFams",
371 :     "mcl" => "MCL",
372 :     "cog" => "COG",
373 :     );
374 : overbeek 1.34 return \%choices;
375 : overbeek 1.15 }
376 :    
377 :     sub choose_sort {
378 :     my ($cgi)=@_;
379 :     my %choices=(
380 :     "none" => "Number of Proteins",
381 :     "genome" => "Genome",
382 :     "cid" => "Unique ID",
383 :     );
384 :    
385 :     my $default = $cgi->param("sort"); unless ($default) {$default="none"}
386 :    
387 :     return $cgi->popup_menu(
388 :     -name => "sort",
389 :     -values => [sort {$choices{$a} cmp $choices{$b}} keys %choices],
390 : overbeek 1.11 -labels => \%choices,
391 :     -default => $default,
392 :     );
393 :     }
394 : redwards 1.10
395 : redwards 1.7
396 : overbeek 1.14 sub comparefig2kegg {
397 :     my ($fig,$cgi,$html)=@_;
398 :    
399 :     my $classification; my %subsystem;
400 :     # read classification from kegg file
401 :     if (open(IN, "$FIG_Config::global/ProteinFamilies/kegg_classificaation.txt")) {
402 :     while (<IN>) {
403 :     chomp;
404 :     my @a=split /\t/;
405 :     my $id=shift(@a);
406 :     $subsystem{"kegg|$id"}=pop(@a);
407 :     push @{$classification->{"kegg|$id"}}, \@a;
408 :     }
409 :     }
410 :    
411 :    
412 :     my $tab=[];
413 :     # find out what families our proteins are in
414 :     map {
415 :     my $prot=$_;
416 :     map {
417 :     my $fam=$_;
418 :     if ($fam =~ /^fig/) {
419 :     my %ss;
420 :     map {$ss{$_->[0]}++} ($fig->subsystems_for_role($fig->family_function($fam)));
421 :     map {my $ss=$_; push @$tab, [$prot, @{$fig->subsystem_classification($ss)}, $ss, $fam, $fig->family_function($fam)]} keys %ss;
422 :     }
423 :     else {
424 :     map {push @$tab, [$prot, @{$_}, $subsystem{$fam}, $fam, $fig->family_function($fam)]} @{$classification->{$fam}}
425 :     }
426 :     } grep {/^fig/ || /^kegg/} $fig->families_for_protein($prot);
427 :     } $cgi->param('proteins');
428 :    
429 :     my $col_hdrs=['Protein', ['Classification', 'th colspan=2'], 'Subsystem', 'Family ID', 'Family Function'];
430 :     push @$html, &HTML::make_table($col_hdrs, $tab, "Families"), "\n",
431 :     }
432 : overbeek 1.17
433 :     ## Based on request from Ross:
434 :     # Subject: Re: fig.pl
435 :     # Date: October 4, 2005 6:21:00 AM PDT
436 :     # From: Ross@theFIG.info
437 :     # To: raedwards@gmail.com
438 :     #
439 :     #Rob,
440 :     #
441 :     #It seems to me that you got that right, and the function is certainly at the
442 :     #core of what is needed. I have been thinking about what I would want with
443 :     #protein families,
444 :     #and it goes something like this:
445 :     #
446 :     #1. Given a protein FIG1, you can get the set of proteins with the same CID
447 :     #(call it CID1). Call this set EQUIV, since it is really a set of IDs that are
448 :     #equivalent.
449 :     #
450 :     #2. From the set of IDs in EQUIV, you can get the set of protein families (from
451 :     #all sources) that contain the IDs in EQUIV. This gives a table
452 :     #
453 :     #
454 :     # [,ID,Function,Family,FamilyFunction]
455 :     #
456 :     # All of the table entries describe a family containing CID1.
457 :     #
458 :     #3. From this table you select two Families to be compared (e.g., one KEGG
459 :     #family vs a FIG family). This ends the first part -- selecting the precise
460 :     #two
461 :     # families to be compared. Each of the two families should be thought of
462 :     #as [CID,ID,Family].
463 :     #
464 :     #4. The comparison of SET1 and SET2 uses essentially the function you
465 :     #implemented. You need to form three sets:
466 :     #
467 :     # the intersection of SET1 and SET2
468 :     # SET1 - SET2
469 :     # SET2 - SET1
470 :     #
471 :     # You may or may not wish to display each of the three sets. The user
472 :     #should be able to select which. When you think
473 :     # of one of these sets, it is useful to think of
474 :     #{CID,Family,Set-of-CIDs}. That is, it is not just a set of CIDs; it should be
475 :     #viewed as a
476 :     # set of CIDs from a specific family that was chosen because it
477 :     #contained a specific CID.
478 :     #
479 :     #5. When displaying a set of proteins from a given family, you start with
480 :     #(CID,Family,Set-of-CIDs). Each line should contain
481 :     #
482 :     # 1. A single CID from the Set-of-CIDs (call this CID2).
483 :     #
484 :     # 2. A count of the number of sources that place both CID1 and CID2
485 :     #in the same family (note that this is not a count of the families that include
486 :     #both CID1 and CID2)
487 :     #
488 :     # 3. For each source a "Y" or space indicating whether or not the
489 :     #source placed CID1 and CID2 into the same family (i.e., whether or not there
490 :     # is at least one family from the source that contains both
491 :     #CID1 and CID2).
492 :     #
493 :     #That is what I think should be done. Can we discuss it?
494 :     #
495 :    
496 :    
497 :    
498 :     sub set_of_equivs {
499 : overbeek 1.22 ($fig, $cgi, $html)=@_;
500 : overbeek 1.27 my $peg=$cgi->param('prot');
501 :     my $tab=[];
502 :     my $allfams;
503 : overbeek 1.19
504 : overbeek 1.27 #begin the html so we can add hidden things
505 :     my $genusspecies=$fig->genus_species($fig->genome_of($peg));
506 :     push @$html, (
507 :     $cgi->start_form(-method=>'get'), "\n",
508 :     $cgi->p("<h1>Protein <b>$peg</b>: $genusspecies</h1>"), "\n",
509 :     $cgi->hidden(-name=>'querycid', -value=>$fig->prot_to_cid($peg)), "\n",
510 :     $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table below shows all the families that contain a protein with the same sequence as <b>$peg</b>, although each family uses different IDs. The number of different IDs in each protein family and the number of unique protein sequences in each family are also shown. The latter is often less than the former since many databases contain identical proteins with different identifiers (for example the same protein from different <i>$genusspecies</i> genome sequences).', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
511 :     );
512 :    
513 :     foreach my $fam ($fig->families_for_protein($peg))
514 :     {
515 :     my $ffunc=$fig->family_function($fam) || " &nbsp; ";
516 :     push @$tab, [
517 :     &protein_link($peg),
518 :     scalar($fig->function_of($peg)),
519 :     "<a href=\'proteinfamilies.cgi?family=$fam&user=$user\'>$fam</a>",
520 :     $ffunc,
521 :     $fig->ext_sz_family($fam),
522 :     $fig->sz_family($fam),
523 :     ];
524 :     $allfams->{$fam}="$fam :" . $fig->family_function($fam);
525 :     }
526 : overbeek 1.19
527 : overbeek 1.17
528 :     $tab=&HTML::merge_table_rows($tab, {3=>1, 4=>1});
529 : overbeek 1.27
530 : overbeek 1.22 my $radbut={
531 :     "1not2"=>"In family one and NOT family two\n",
532 :     "2not1"=>"In family two and NOT family one\n",
533 : overbeek 1.31 "1and2"=>"In family one and family two\n",
534 :     "1or2" =>"In family one or family two\n",
535 : overbeek 1.22 };
536 : overbeek 1.27
537 :     # sort the list of families in this table but put the fig families at the beginning of the list
538 :     my @familylist=sort {$a cmp $b} grep {$_ !~ /^fig/} keys %$allfams;
539 :     unshift @familylist, sort {$a cmp $b} grep {$_ =~ /^fig/} keys %$allfams;
540 :    
541 :     my $col_hdrs=['Protein', 'Function', 'Family', 'Family Function', 'External<br>IDs In<br>Family', 'Unique<br>Proteins<br>In Family'];
542 : overbeek 1.21 push @$html, &HTML::make_table($col_hdrs, $tab, ""), "\n", $cgi->p("To differentiate families in this table, please choose two families:"),
543 : overbeek 1.27 "Family 1: &nbsp; ", $cgi->popup_menu(-name=>"family1", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[0]),
544 :     " <br /> Family 2: &nbsp; ", $cgi->popup_menu(-name=>"family2", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[1]),
545 : overbeek 1.18 $cgi->p("Show proteins:<br /><ul>\n",
546 : overbeek 1.27 $cgi->radio_group(-name=>"diff", -values=>[keys %$radbut], -labels=>$radbut, -rows=>4),
547 : overbeek 1.22 "</ul>\n"),
548 :     $cgi->hidden(-name=>'prot', -value=>$peg),
549 : overbeek 1.27 $cgi->submit(-name=>"differentiate", -value=>"Compare these families"), $cgi->reset, $cgi->end_form();
550 : overbeek 1.17 }
551 :    
552 : overbeek 1.27 # initially I added this option in, with appropriate help text, but then I added the union option, and I think that surplants this, so I removed it!
553 :     # $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table will show only those proteins from one of the families that contains proteins that could be in the other family but are not. Most likely this will only be a few proteins, but the table above shows that there are a large number of proteins in one family but not another. Many of these are proteins that are missing from the database. Checking this box will show all the proteins from the chosen families, not just those with missing proteins. By default, you should not check this box.', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
554 :     # "Show all correspondences, not just those with missing proteins: ", $cgi->checkbox(-name=>"show", -value=>"all", -label=>""),
555 :    
556 : overbeek 1.22
557 : overbeek 1.17 sub differentiate {
558 : overbeek 1.22 ($fig, $cgi, $html)=@_;
559 :    
560 :     #my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in
561 :     #undef $focus if ($focus eq "all");
562 :     my $focus=$cgi->param('family2');
563 :     $focus =~ s/\|.*//;
564 :    
565 :     my ($fam_id1, $fam_id2)=($cgi->param('family1'), $cgi->param('family2'));
566 :     if ($fam_id1 eq $fam_id2)
567 :     {
568 :     push @$html, "<h2 style='color: red'>Please choose two different protein families</h2>";
569 :     return;
570 :     }
571 :     my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);
572 : overbeek 1.27
573 :     # This block of code is the one that really decides what the data is that is analyzed.
574 :     # $fam1 and $fam2 are references to arrays containing all the CIDs in each family. We combine them with intersection, union, or differences
575 :     # and then process the remaining IDs.
576 :    
577 :     my $set=[];
578 :     if ($cgi->param("diff") eq "1and2")
579 :     {
580 :     $set=&set_utilities::intersection($fam1, $fam2);
581 :     }
582 :     elsif ($cgi->param("diff") eq "1not2")
583 :     {
584 :     $set=&set_utilities::set_diff($fam1, $fam2);
585 :     }
586 :     elsif ($cgi->param("diff") eq "2not1")
587 :     {
588 :     $set=&set_utilities::set_diff($fam2, $fam1);
589 :     }
590 :     elsif ($cgi->param("diff") eq "1or2")
591 : overbeek 1.22 {
592 : overbeek 1.27 $set=&set_utilities::union($fam1, $fam2);
593 :     }
594 :    
595 : overbeek 1.32 #### which families
596 :     #
597 :     # We need to figure out which other families to compare to (i.e. what families will be in the columns on the table).
598 :     # There are two ways to do this. If we come from the perspective of a single protein, we just want to look at the families
599 :     # that protein maps to. This is the regular view.
600 :     #
601 :     # Alternatively, if we come from the perspective of the whole family, we will just work out all the families
602 :     # that are present in @$set
603 :     #
604 :     # Note that in both cases we grep out our two query families and then unshift them to make them the first two query columns
605 :    
606 :     my @families;
607 :     if ($cgi->param('prot'))
608 :     {
609 :     my $peg=$cgi->param('prot');
610 :     @families=sort {$a cmp $b} grep {$_ ne $fam_id1} grep {$_ ne $fam_id2} $fig->families_for_protein($peg);
611 :     }
612 :     else
613 :     {
614 :     my %allfams;
615 :     foreach my $cid (@$set)
616 :     {
617 :     foreach my $infamily ($fig->in_family($cid))
618 :     {
619 :     $allfams{$infamily}++;
620 :     }
621 :     }
622 :     @families=sort {$allfams{$b} <=> $allfams{$a}} grep {$_ ne $fam_id1} grep {$_ ne $fam_id2} keys %allfams;
623 :     }
624 :    
625 :     unshift @families, ($fam_id1, $fam_id2);
626 :     my @source=@families;
627 :     map {/^(.*?)\|/; $_=$1} @source;
628 :    
629 :     # now figure out all the external IDs in those families
630 :     my $extids;
631 :     foreach my $fam (@families)
632 :     {
633 :     map {$extids->{$_}->{$fam}=1} $fig->ext_ids_in_family($fam);
634 :     }
635 : overbeek 1.31
636 : overbeek 1.32 # finally generate the table. Note that there are three different arrays that we operate on depending on the user input
637 :     # but it really only changes which set algorith we use. Each array is handled identically.
638 :     my $tab=[];
639 :    
640 :     my ($totalfor, $totalagainst);
641 : overbeek 1.27 foreach my $cid (@$set)
642 :     {
643 :     #my $row=["<a href='proteinfamilies.cgi?prot=$cid'>$cid</a>"];
644 :     my $row=[];
645 : overbeek 1.32 my $seen; my $mismatchcolor; my $nofamcolor;
646 : overbeek 1.31 my ($for, $against)=(0,0);
647 : overbeek 1.27 foreach my $prot (sort $fig->cid_to_prots($cid))
648 : overbeek 1.22 {
649 : overbeek 1.27 for (my $i=0; $i<=$#families; $i++)
650 : overbeek 1.22 {
651 :     # add the protein info to the cell in the table if it this family has that protein. Note that we have seen it, and increment the column counter
652 :     # this if is if the protein that we are looking at is in the family for this column then add it
653 :    
654 :     # if the protein is not added, we want to know if it has the same start characters as the family (i.e. from the same source), and note that.
655 : overbeek 1.27 if ($extids->{$prot}->{$families[$i]})
656 :     {
657 :     $seen->{$prot}=1;
658 :     $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
659 : overbeek 1.31 $for++;
660 : overbeek 1.27 }
661 :     elsif ($prot =~ /^$source[$i]/)
662 :     {
663 : overbeek 1.32 if ($fig->ext_family_for_id($prot)) {$mismatchcolor->{$i}=1} else {$nofamcolor->{$i}=1}
664 : overbeek 1.27 $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
665 : overbeek 1.31 $against--; # note that against is a negative score!
666 : overbeek 1.27 }
667 :     }
668 :     }
669 :    
670 :     unless ($#$row == $#families) {$#$row=$#families}
671 : overbeek 1.22
672 : overbeek 1.27 # color those cells that have a mismatch. note that this colors the whole cell even if there is more than one protein mismatching
673 : overbeek 1.32 map {$row->[$_] = [$row->[$_], "td style='background-color: #CCCCFF'"]} keys %$nofamcolor;
674 : overbeek 1.22 map {$row->[$_] = [$row->[$_], "td style='background-color: #FF3366'"]} keys %$mismatchcolor;
675 : overbeek 1.27 # change empty cells
676 : overbeek 1.22 map {$row->[$_] = " &nbsp; " unless ($row->[$_])} (0 .. $#$row);
677 :    
678 : overbeek 1.31 # add the score
679 : overbeek 1.32 splice(@$row, 0, 0, "$for/$against");
680 : overbeek 1.31
681 : overbeek 1.22 # if we want to show everything do so, otherwise only show the rows where there is a missing protein
682 : overbeek 1.27 if (($cgi->param("diff") eq "1and2") || ($cgi->param("diff") eq "1or2") || ($cgi->param('show') eq "all"))
683 : overbeek 1.22 {
684 :     push @$tab, $row;
685 : overbeek 1.32 $totalfor+=$for; $totalagainst+=$against;
686 : overbeek 1.22 }
687 : overbeek 1.31 elsif ($cgi->param("diff") eq "1not2" && $row->[2] ne " &nbsp; ")
688 : overbeek 1.22 {
689 :     push @$tab, $row;
690 : overbeek 1.32 $totalfor+=$for; $totalagainst+=$against;
691 : overbeek 1.22 }
692 : overbeek 1.32 elsif ($cgi->param("diff") eq "2not1" && $row->[1] ne " &nbsp; ")
693 : overbeek 1.25 {
694 : overbeek 1.26 #($row->[1], $row->[2])=($row->[2], $row->[1]);
695 : overbeek 1.25 push @$tab, $row;
696 : overbeek 1.32 $totalfor+=$for; $totalagainst+=$against;
697 : overbeek 1.25 }
698 : overbeek 1.22 }
699 : overbeek 1.32
700 :     # sort the table
701 :     @$tab=sort {$a->[1] cmp $b->[1] || $a->[2] cmp $b->[2]} @$tab;
702 : overbeek 1.27
703 :     #generate the titles
704 :     my $title;
705 : overbeek 1.22 ($cgi->param("diff") eq "1and2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
706 : overbeek 1.27 ($cgi->param("diff") eq "1or2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) OR in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
707 : overbeek 1.22 ($cgi->param("diff") eq "1not2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) BUT NOT in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
708 :     ($cgi->param("diff") eq "2not1") ? $title.=$fig->family_function($fam_id2). " ($fam_id2) BUT NOT in " . $fig->family_function($fam_id1). " ($fam_id1)\n" : 1;
709 : overbeek 1.27
710 :     push @$html, (
711 :     "<h3 style='text-align: center'>Comparison of proteins in $title</h3>\n",
712 :     "<p>The table shows those proteins that are in $title. The rows show proteins with the same sequence. All the IDs along the row are different IDs from identical protein sequences. The columns of the table are different protein families. If a protein in an individual cell has a red background, that protein is either not in the family for that column, or is in that family and another family in the same database.</p>\n",
713 : overbeek 1.32 "<p>To find out which families each protein is in, scroll the mouse over the link to the protein database. The popup window will show you the protein families for each protein. Note, however, if this protein is in the family for that column only that family is currently shown. If the pop-up window has a green background the protein is in the same family as the column as a whole. If the pop-up window has a red background the protein is in a different family than the column as a whole. If the pop-up window has a blue background the protein is not in any protein families. The red background and the red cell color and the blue background and blue cell color are complimentary and reinforce that these are proteins you should look at.</p>\n",
714 : overbeek 1.27 );
715 :    
716 : overbeek 1.22
717 :     my @headers=@families;
718 : overbeek 1.26 map {$_ = "<a " . FIGjs::mouseover("Column Family", $fig->family_function($_) . " ($_)", '') . " href='proteinfamilies.cgi?family=$_'>$_</a>"} @headers;
719 : overbeek 1.32 splice(@headers, 0, 0, "Score");
720 :     if ($tab->[0])
721 : overbeek 1.26 {
722 : overbeek 1.27 push @$html, HTML::make_table(\@headers, $tab, "Proteins In $title");
723 : overbeek 1.32 push @$html, "<p><b>Total Score: </b><br>\nSupporting this comparison between families: <b>$totalfor</b><br>\n",
724 :     "Not supporting this comparison between families: <b>$totalagainst</b></p>\n";
725 :    
726 :    
727 : overbeek 1.26 }
728 :     else
729 :     {
730 : overbeek 1.27 my $sorry="<p>Sorry there were no protein families that satisfied looking for</p>\n<p>$title</p>";
731 :     if (($cgi->param("diff") eq "1not2") || ($cgi->param("diff") eq "2not1")) {$sorry .= "<p>and had candidate proteins that could be in those families</p>"}
732 :     push @$html, $cgi->h3("<div style='color: blue; text-align: center'>$sorry</span>");
733 : overbeek 1.18 }
734 :     }
735 :    
736 : overbeek 1.27 =head2 protein_link()
737 : overbeek 1.18
738 : overbeek 1.27 This takes a protein ID and returns the link (full link including ID) back to the appropriate database.
739 : overbeek 1.18
740 : overbeek 1.27 If addmouseover is true then a mouseover will be added showing the families the peg is in
741 : overbeek 1.21
742 : overbeek 1.27 If $fam is provided it will be the header of the mouseover popup.
743 : overbeek 1.21
744 : overbeek 1.27 =cut
745 : overbeek 1.22
746 :     sub protein_link {
747 : overbeek 1.27 my ($p, $addmouseover, $fam) =@_;
748 : overbeek 1.22 my %proteinbase=(
749 : overbeek 1.30 # put here the link to the site for the proteins.
750 :     # Note the use of single quotes. Later, anywhere where the protein ID should be I'll replace \$p with the protein id.
751 :     # This is because for some reason cog needs the protein ID twice.
752 :     "fig" => "protein.cgi?user=$user&prot=fig|" . '$p',
753 :     "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/blux.cgi?cog=" . '$p&$p',
754 :     "sp" => "http://www.expasy.org/uniprot/" . '$p',
755 :     "tr" => "http://www.expasy.org/uniprot/" . '$p',
756 :     "kegg" => "http://www.genome.jp/dbget-bin/www_bget?" .'$p',
757 : overbeek 1.22 );
758 : overbeek 1.30
759 :     # this was the old link that I think is wrong
760 :     # "cog" => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
761 :    
762 : overbeek 1.26 my $mouseovertitle="Protein Families";
763 :     if ($fam)
764 :     {
765 : overbeek 1.27 $mouseovertitle="<i>Column family: " . $fig->family_function($fam) . " ($fam)</i>";
766 :     }
767 :     my $familiesforp = "<b>Families for $p:</b><br>";
768 :     my ($hcolor, $bgcolor)=('#11AA66','#BBFFBB'); # text background color.
769 :    
770 :     # if the protein is in our family of interest show just that, otherwise show all the families
771 :     my @thisfam=$fig->ext_family_for_id($p);
772 :     if (grep {$_ eq $fam} @thisfam)
773 :     {
774 :     $familiesforp .= $fig->family_function($fam) . " ($fam)";
775 :     }
776 :     elsif (scalar(@thisfam))
777 :     {
778 :     $familiesforp .= "<ul>" . join("", map {"<li> " . $fig->family_function($_) . " ($_)</li>"} @thisfam) . "</ul>";
779 :     if (!$fam) {($hcolor, $bgcolor)=('','')} # use the default colors
780 :     else {($hcolor, $bgcolor)=('#CC0000', '#FF3366')} # we're doing a comparison and the families are different so color them red
781 :     }
782 :     else
783 :     {
784 :     $familiesforp="<b>Families for $p:</b><br>No protein families";
785 : overbeek 1.29 ($hcolor, $bgcolor)=('','');
786 : overbeek 1.27 }
787 :    
788 :     foreach my $key (keys %proteinbase)
789 :     {
790 :     if ($p =~ /^$key/ && $addmouseover)
791 :     {
792 :     $p =~ s/^(.*?)\|//;
793 : overbeek 1.30 my $dbase=$1;
794 :     my $location=$proteinbase{$key};
795 :     $location =~ s/\$p/$p/g; # this is for stupid cog that requires two invocations of the protein ID
796 : overbeek 1.32 $p = "<a " . FIGjs::mouseover($mouseovertitle, $familiesforp, '', '1', $hcolor, $bgcolor) . " href='$location' target='window_$$'>$dbase|$p</a>";
797 : overbeek 1.27 }
798 :     elsif ($p =~ /^$key/)
799 :     {
800 :     $p =~ s/^(.*?)\|//;
801 : overbeek 1.30 my $dbase=$1;
802 :     my $location=$proteinbase{$key};
803 :     $location =~ s/\$p/$p/g; # this is for stupid cog that requires two invocations of the protein ID
804 : overbeek 1.32 $p = "<a href='$location' target='window_$$'>$dbase|$p</a>";
805 : overbeek 1.27 }
806 : overbeek 1.26 }
807 : overbeek 1.22 return $p;
808 :     }
809 :    
810 : overbeek 1.21
811 : overbeek 1.32 sub comparepairs{
812 :     ($fig, $cgi, $html)=@_;
813 :     # very very experimental. Don't look at this code or use it
814 :     # we are going to take a protein family and then look at all the ids in that family and count the occurences of other families
815 :     # then find the proteins and give them a score
816 :    
817 :     my $fam=$cgi->param("family");
818 :     return unless ($fam);
819 : overbeek 1.21
820 : overbeek 1.32 my $count;
821 :     foreach my $cid ($fig->ids_in_family($fam))
822 :     {
823 :     foreach my $newfamily ($fig->in_family($cid))
824 :     {
825 :     $count->{$newfamily}++;
826 :     }
827 :     }
828 :    
829 :     my $tab;
830 :     foreach my $test (keys %$count)
831 :     {
832 :     next if ($test eq $fam);
833 :     my $res=&score_two_families($fam, $test, "or");
834 :     my $link1="<a href=\"proteinfamilies.cgi?user=$user&family1=$fam&family2=$test&diff=2not1&differentiate=Compare+these+families\" target=\"window_$$\">$fam</a>";
835 :     my $link2="<a href=\"proteinfamilies.cgi?user=$user&family=$test\" target=\"window_$$\">$test</a>";
836 :     push @$tab, [$link1, $link2, $count->{$test}, @$res];
837 :     }
838 :    
839 :     @$tab=sort {$b->[2] <=> $a->[2] || $b->[3] <=> $a->[3] || $a->[4] <=> $b->[4]} @$tab;
840 :    
841 :    
842 :     push @$html,
843 :     $cgi->h3("Comparisons between protein families"),
844 :     $cgi->p("The table below shows a comparison of <b>", $fig->family_function($fam), " ($fam)</b> with all the other protein families ",
845 :     " that proteins in $fam are also present in. The link in the first column will take you to a table showing proteins that are in family two ",
846 :     " that are not in $fam. These are proteins that you should investigate for being in your family. ",
847 :     " The link in the second column will take you to that families page. The <b>frequency</b> is the number of proteins in $fam that are also in the second family.",
848 :     " The <b>For</b> score is the total number of proteins that agree both families are similar. The <b>Against</b> score is the total number of proteins ",
849 :     " that disagree that the two proteins are similar. These numbers are the sum of the for and against scores shown under the side-by-side comparison ",
850 :     " when you click the link in column 1."),
851 :     HTML::make_table(["Family 1", "Family 2", "Frequency", "For", "Against"], $tab, "Scores");
852 :     }
853 :    
854 :     sub score_two_families {
855 :     my ($fam_id1, $fam_id2, $method)=@_;
856 :     my $focus=$fam_id1;
857 :     $focus =~ s/\|.*//;
858 :    
859 :     return unless ($fam_id1 && $fam_id2);
860 :     my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);
861 :    
862 :     my $set=[];
863 :     if ($method eq "and") {$set=&set_utilities::intersection($fam1, $fam2)}
864 :     elsif ($method eq "not") {$set=&set_utilities::set_diff($fam1, $fam2)}
865 :     elsif ($method eq "or") {$set=&set_utilities::union($fam1, $fam2)}
866 :    
867 :     # now figure out all the families represented by @$set
868 :     my %allfams;
869 :     foreach my $cid (@$set)
870 :     {
871 :     foreach my $infamily ($fig->in_family($cid))
872 :     {
873 :     $allfams{$infamily}++;
874 :     }
875 :     }
876 :    
877 :     my @families=sort {$allfams{$b} <=> $allfams{$a}} grep {$_ ne $fam_id1} grep {$_ ne $fam_id2} keys %allfams;
878 :     unshift @families, ($fam_id1, $fam_id2);
879 :     my @source=@families;
880 :     map {/^(.*?)\|/; $_=$1} @source;
881 :    
882 :     # now figure out all the external IDs in those families
883 :     my $extids;
884 :     foreach my $fam (@families) {map {$extids->{$_}->{$fam}=1} $fig->ext_ids_in_family($fam)}
885 :    
886 :     my ($totalfor, $totalagainst);
887 :     foreach my $cid (@$set)
888 :     {
889 :     my ($for, $against)=(0,0);
890 :     foreach my $prot (sort $fig->cid_to_prots($cid))
891 :     {
892 :     for (my $i=0; $i<=$#families; $i++)
893 :     {
894 :     if ($extids->{$prot}->{$families[$i]})
895 :     {
896 :     $for++;
897 :     }
898 :     elsif ($prot =~ /^$source[$i]/)
899 :     {
900 :     $against--; # note that against is a negative score!
901 :     }
902 :     }
903 :     }
904 :    
905 :     $totalfor+=$for; $totalagainst+=$against;
906 :     }
907 :     return [$totalfor, $totalagainst];
908 :     }
909 :    
910 : overbeek 1.34
911 :     sub show_sources {
912 :     my $source=$cgi->param('bysource');
913 :     # this is the display name and so we need to reverse map it to the source from $fams
914 :     my $fams=&families();
915 :     my $extrahtml;
916 :     map {$source=$_ if ($source eq $fams->{$_})} keys %$fams;
917 :     # now source will be something like fig, kegg, mcl, etc
918 :     # get all families
919 :     my @families = $fig->families_by_source($source);
920 :     my $label;
921 :     map {$label->{$_}=substr($fig->family_function($_), 0, 100)} @families;
922 :     @families=sort {$label->{$a} cmp $label->{$b}} @families;
923 :     # limit the families to some text
924 :     if ($cgi->param('limitfamiliesto'))
925 :     {
926 :     my $text=$cgi->param('limitfamiliesto');
927 :     @families=grep{$label->{$_}=~/$text/i} @families;
928 :     }
929 :    
930 :     if ($source eq "pir")
931 :     {
932 :     my $full='full';
933 :     if ($cgi->param('pirsflimit')) {$full=$cgi->param('pirsflimit')} else {$cgi->param('pirsflimit', $full)}
934 :     if ($full ne "No Limit")
935 :     {
936 :     print STDERR "Looking for $full\n";
937 :     @families=grep{$label->{$_}=~/^\($full/i} @families;
938 :     }
939 :     my $pirsflimits={full=>"Full", prelim=>"Preliminary", 'NoLimit'=>'No Limits', none=>"None of these"};
940 :     $extrahtml.=$cgi->p("Limit to PIR SF: ", $cgi->popup_menu(-name=>"pirsflimit", -values=>[keys %$pirsflimits], -labels=>$pirsflimits));
941 :     }
942 :    
943 :     my $sl=$cgi->scrolling_list(-name=>'family', -values=>\@families, -labels=>$label, -size=>10, -multiple=>0);
944 :     push @$html,
945 :     $cgi->start_form(-method=>'get'), "\n",
946 :     "<h2>Protein Families</h2>\n",
947 :    
948 :     $cgi->div({class=>'enterprotein'}, "<h2>These are the ", scalar(@families), " protein families for ", $fams->{$source}, "</h2>\n",
949 :     "Please choose a protein family from the list and then you will be shown the proteins in that family",
950 :     $sl,
951 :     $cgi->p("You can limit this table to some text: ", $cgi->textfield(-name=>"limitfamiliesto", -value=>"", -size=>20)),
952 :     $extrahtml,
953 :     $cgi->hidden(-name=>'bysource', -value=>$fams->{$source}),
954 :     $cgi->p($cgi->submit(-name=>'bysource', -value=>"Rebuild table"), $cgi->submit('submit', 'Show Family'), $cgi->reset()),
955 :    
956 :    
957 :     ),
958 :    
959 :     $cgi->end_form;
960 :     return $html;
961 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3