[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Diff of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Mon Oct 3 23:22:48 2005 UTC revision 1.17, Wed Oct 5 15:48:22 2005 UTC
# Line 72  Line 72 
72  elsif ($cgi->param('reverse_analyse_family')) {  elsif ($cgi->param('reverse_analyse_family')) {
73   &analyse_family($fig,$cgi,$html, 1);   &analyse_family($fig,$cgi,$html, 1);
74  }  }
75    elsif ($cgi->param('equivalence'))
76    {
77     &set_of_equivs($fig,$cgi,$html);
78    }
79    elsif ($cgi->param('differentiate'))
80    {
81     &differentiate($fig,$cgi,$html);
82    }
83  elsif ($cgi->param('family'))  elsif ($cgi->param('family'))
84  {  {
85   &show_family($fig,$cgi,$html);   &show_family($fig,$cgi,$html);
# Line 100  Line 108 
108   "<h2>Protein Families</h2>\n",   "<h2>Protein Families</h2>\n",
109   "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",   "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
110   "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",   "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",
111   $cgi->start_form(),   $cgi->start_form(-method=>'get'),
112   "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",   "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",
113     $cgi->submit(-name=>'equivalence', -value=>"Show an equivalence table"),
114   "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",   "<p>Alternately, you can enter a family. Please enter a family name in the format pir|PIRSF001547 or fig|PF002363.</p>",
115   "Please enter a family id:  ", $cgi->textfield(-name=>"family", -size=>40), "<br>",   "Please enter a family id:  ", $cgi->textfield(-name=>"family", -size=>40), "<br>",
116   $cgi->submit, $cgi->reset, $cgi->end_form;   $cgi->submit, $cgi->reset, $cgi->end_form;
# Line 130  Line 139 
139    "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",    "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",
140    "even though they may come from different organisms.</p>",    "even though they may come from different organisms.</p>",
141    "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",    "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
142    $cgi->start_form,    $cgi->start_form(-method=>'get'),
143    &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),    &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
144    $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"),    $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"),
145    $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),    $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
# Line 158  Line 167 
167     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];     push @$tab, ["<a href='$self?family=$fam'>$fam</a>", $fig->family_function($fam), $noprots, $cgi->checkbox(-name=>$fam, -label=>'')];
168    }    }
169    
170    my $col_hdrs=['Family ID', 'Family Function', 'Number of IDs in Family', 'Choose Family'];    my $col_hdrs=['Family ID', 'Family Function', 'Number of CIDs in Family', 'Choose Family'];
171    push @$html, "<h2>Families for $peg</h2>\n",    push @$html, "<h2>Families for $peg</h2>\n",
172    $cgi->start_form,    $cgi->start_form(-method=>'get'),
173    "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",    "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
174      "A CID is a unique, internal ID we have assigned to proteins with identical sequences",
175    &HTML::make_table($col_hdrs, $tab, "Families for $peg"),  "\n",    &HTML::make_table($col_hdrs, $tab, "Families for $peg"),  "\n",
176    $cgi->submit('Show Proteins In Each Family'),    $cgi->submit('Show Proteins In Each Family'),
177    $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),    $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
# Line 248  Line 258 
258     push @$tab, \@row;     push @$tab, \@row;
259    }    }
260    
261    push @$html,  $cgi->start_form(), $cgi->p("Limit the display to proteins from ", &choose_focus($cgi), "\n"), $cgi->p("Sort the order by ", &choose_sort($cgi),"\n");    push @$html,  $cgi->start_form(-method=>'get'), $cgi->p("Limit the display to proteins from ", &choose_focus($cgi), "\n"), $cgi->p("Sort the order by ", &choose_sort($cgi),"\n");
262    if ($reverse) {    if ($reverse) {
263     push @$html, $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family.");     push @$html, $cgi->p("These are proteins that ARE NOT in ", $fig->family_function($col), " ($col) but are in other families that have proteins in this family.");
264    } else {    } else {
# Line 373  Line 383 
383   push @$html, &HTML::make_table($col_hdrs, $tab, "Families"),  "\n",   push @$html, &HTML::make_table($col_hdrs, $tab, "Families"),  "\n",
384  }  }
385    
386    ## Based on request from Ross:
387    #       Subject:        Re: fig.pl
388    #       Date:   October 4, 2005 6:21:00 AM PDT
389    #       From:     Ross@theFIG.info
390    #       To:       raedwards@gmail.com
391    #
392    #Rob,
393    #
394    #It seems to me that you got that right, and the function is certainly at the
395    #core of what is needed.  I have been thinking about what I would want with
396    #protein families,
397    #and it goes something like this:
398    #
399    #1. Given a protein FIG1, you can get the set of proteins with the same CID
400    #(call it CID1).  Call this set EQUIV, since it is really a set of IDs that are
401    #equivalent.
402    #
403    #2. From the set of IDs in EQUIV, you can get the set of protein families (from
404    #all sources) that contain the IDs in EQUIV.  This gives a table
405    #
406    #
407    #            [,ID,Function,Family,FamilyFunction]
408    #
409    #    All of the table entries describe a family containing CID1.
410    #
411    #3.  From this table you select two Families to be compared (e.g., one KEGG
412    #family vs a FIG family).  This ends the first part -- selecting the precise
413    #two
414    #      families to be compared.  Each of the two families  should be thought of
415    #as [CID,ID,Family].
416    #
417    #4.  The comparison of SET1 and SET2 uses essentially the function you
418    #implemented.  You need to form three sets:
419    #
420    #            the intersection of SET1 and SET2
421    #            SET1 - SET2
422    #            SET2 - SET1
423    #
424    #       You may or may not wish to display each of the three sets.  The user
425    #should be able to select which.  When you think
426    #        of one of these sets, it is useful to think of
427    #{CID,Family,Set-of-CIDs}.  That is, it is not just a set of CIDs; it should be
428    #viewed as a
429    #        set of CIDs from a specific family that was chosen because it
430    #contained a specific CID.
431    #
432    #5. When displaying a set of proteins from a given family, you start with
433    #(CID,Family,Set-of-CIDs).  Each line should contain
434    #
435    #            1. A single CID from the Set-of-CIDs  (call this CID2).
436    #
437    #            2. A count of the number of sources that place both CID1 and CID2
438    #in the same family (note that this is not a count of the families that include
439    #both CID1 and CID2)
440    #
441    #            3.  For each source a "Y" or space indicating whether or not the
442    #source placed CID1 and CID2 into the same family (i.e., whether or not there
443    #                  is at least one family from the source that contains both
444    #CID1 and CID2).
445    #
446    #That is what I think should be done.  Can we discuss it?
447    #
448    
449    
450    
451    sub set_of_equivs {
452     my ($fig, $cgi, $html)=@_;
453     foreach my $peg ($cgi->param('prot')) {
454      my $cid=$fig->prot_to_cid($peg);
455      my @equiv=$fig->cid_to_prots($cid);
456      my $tab=[];
457      my $allfams;
458      map {
459       my $id=$_;
460       map {
461        push @$tab, [$fig->prot_to_cid($id), $id, scalar($fig->function_of($id)), $_, $fig->family_function($_)];
462        $allfams->{$_}="$_ : " . $fig->family_function($_);
463        } $fig->families_for_protein($id);
464      } @equiv;
465    
466      $tab=&HTML::merge_table_rows($tab, {3=>1, 4=>1});
467      my $col_hdrs=['CID', 'Protein', 'Function of Proteins', 'Family', 'Family Function'];
468      push @$html, $cgi->start_form(-method=>'get'), $cgi->p("For protein <b>$peg</b>, which has the unique ID <b>$cid</b>, this is the EQUIV set."),
469      &HTML::make_table($col_hdrs, $tab, ""),  "\n", $cgi->p("To differentiate families in this table, please choose two families:"),
470      $cgi->popup_menu(-name=>"family1", -values=>[sort {$a cmp $b} keys %$allfams], -labels=>$allfams),
471      $cgi->popup_menu(-name=>"family2", -values=>[sort {$a cmp $b} keys %$allfams], -labels=>$allfams),
472      $cgi->p("Limit the display to proteins from ", &choose_focus($cgi), "\n"),
473      $cgi->p("Show proteins:<br /><ul>", $cgi->checkbox(-name=>"1not2", -label=>"In family one and NOT family two"), "<br />",
474      $cgi->checkbox(-name=>"2not1", -label=>"In family two and NOT family one"), "<br />",
475      $cgi->checkbox(-name=>"intersect", -label=>"In both families"), "</ul>"),
476      $cgi->submit(-name=>"differentiate", -value=>"Differentiate these families"), $cgi->reset, $cgi->end_form();
477     }
478    }
479    
480    sub differentiate {
481     my ($fig, $cgi, $html)=@_;
482    
483     my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in
484     undef $focus if ($focus eq "all");
485    
486     my ($fam_id1, $fam_id2)=($cgi->param('family1'), $cgi->param('family2'));
487     if ($fam_id1 eq $fam_id2) {
488      push @$html, "<h2 style='color: red'>Please choose two different protein families</h2>";
489      return;
490     }
491     my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);
492     push @$html, $cgi->h3("Proteins that are in $fam_id1 (" . $fig->family_function($fam_id1) . ") and not in $fam_id2 (" .
493                 $fig->family_function($fam_id2) . ")\n"), "<ol>\n<li>",
494          join("</li>\n<li>", sort grep {/^$focus/} map {$fig->cid_to_prots($_)} @{&set_utilities::set_diff($fam1, $fam2)}),
495          "</li>\n</ol>\n\n" if ($cgi->param("1not2"));
496     push @$html, $cgi->hr, $cgi->h3("Proteins that are in $fam_id2 (" . $fig->family_function($fam_id2) . ") and not in $fam_id1 (" .
497                 $fig->family_function($fam_id1) . ")\n"), "<ol>\n<li>",
498          join("</li>\n<li>", sort grep {/^$focus/} map {$fig->cid_to_prots($_)} @{&set_utilities::set_diff($fam2, $fam1)}),
499          "</li>\n</ol>\n\n" if ($cgi->param("2not1"));
500     push @$html, $cgi->hr, $cgi->h3("Proteins that are in both $fam_id1 (" . $fig->family_function($fam_id1) . ") and in $fam_id2 (" .
501                 $fig->family_function($fam_id2) . ") <b>[the intersection]</b>\n"), "<ol>\n<li>",
502          join("</li>\n<li>", sort grep {/^$focus/} map {$fig->cid_to_prots($_)} @{&set_utilities::intersection($fam1, $fam2)}),
503          "</li>\n</ol>\n\n" if ($cgi->param("intersect"));
504    }
505    
506    
507    

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3