[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Diff of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26, Tue Oct 25 17:41:32 2005 UTC revision 1.27, Wed Nov 23 20:34:59 2005 UTC
# Line 50  Line 50 
50  unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");  unshift(@$html, "<TITLE>The SEED - Global Protein Families </TITLE>\n");
51    
52    
53  if ($cgi->param('Show Proteins In Each Family'))  if ($cgi->param('equivalence'))
 {  
  my @needed=grep {$cgi->param($_)} $cgi->param("allfams");  
  $cgi->param(-name=>'family', -value=>\@needed);  
  &show_family($fig,$cgi,$html);  
 }  
 elsif ($cgi->param('analyse_family')) {  
  # finding what is there is the same as findingh what is missing you just need one extra !  
  # these two things call the same method.  
  &analyse_family($fig,$cgi,$html, 0);  
 }  
 elsif ($cgi->param('reverse_analyse_family')) {  
  &analyse_family($fig,$cgi,$html, 1);  
 }  
 elsif ($cgi->param('equivalence'))  
54  {  {
55   &set_of_equivs($fig,$cgi,$html);   &set_of_equivs($fig,$cgi,$html);
56  }  }
57  elsif ($cgi->param('differentiate'))  elsif ($cgi->param('differentiate'))
58  {  {
  push @$html, $cgi->h3({style=>"color: red"}, "Scroll down to see the table");  
59   &set_of_equivs($fig,$cgi,$html);   &set_of_equivs($fig,$cgi,$html);
60   &differentiate($fig,$cgi,$html);   &differentiate($fig,$cgi,$html);
61  }  }
# Line 101  Line 86 
86   push @$html,   push @$html,
87   "<h2>Protein Families</h2>\n",   "<h2>Protein Families</h2>\n",
88   "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",   "<p>Please enter a protein ID . You will recieve a list of all the families that protein is in. \n",
89   "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>",   "You can use a FIG ID such as fig|83333.1.peg.3, or an ID from SwissProt, KEGG, NCBI, and others.</p>\n",
90   $cgi->start_form(-method=>'get'),   $cgi->start_form(-method=>'get'), "\n",
91   "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>",   "Please enter a protein id: ", $cgi->textfield(-name=>"prot", -size=>40), "<br>\n",
92   $cgi->submit(-name=>'equivalence', -value=>"Show an equivalence table"), $cgi->reset, $cgi->end_form;   $cgi->submit(-name=>'equivalence', -value=>"Show Protein Families"), "\n", $cgi->reset, $cgi->end_form;
93   return $html;   return $html;
94  }  }
95    
# Line 115  Line 100 
100  sub show_family {  sub show_family {
101   my ($fig,$cgi,$html)=@_;   my ($fig,$cgi,$html)=@_;
102   foreach my $fam ($cgi->param('family')) {   foreach my $fam ($cgi->param('family')) {
   my @cids=sort {$a <=> $b} $fig->ids_in_family($fam);  
103    my $tab=[];    my $tab=[];
104    my $col_hdrs=['CID', 'Polypeptides with same amino acid sequence'];    my $col_hdrs=['#', 'Protein', 'Other proteins with same amino acid sequence'];
105    foreach my $cid (@cids) {    my $count=1;
106     my @pegs=$fig->cid_to_prots($cid);  
107     @pegs=map {&protein_link($_)} @pegs;    foreach my $extid (sort {$a cmp $b} $fig->ext_ids_in_family($fam))
108     push @$tab, [$cid, (join ", ", (@pegs))];    {
109       my $cid=$fig->prot_to_cid($extid);
110       my @pegs=map {&protein_link($_)} grep {$_ ne $extid} $fig->cid_to_prots($cid);
111       push @$tab, [$count, &protein_link($extid, 1), (join ", ", (@pegs))];
112       $count++;
113    }    }
114    
115    push @$html, "<h2>$fam Family</h2>\n",    push @$html, "<h2>$fam Family</h2>\n",
116    "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",    "<p>The family $fam has the function ", $fig->family_function($fam), ", and contains ", $fig->sz_family($fam), " proteins, as shown in the table below.<br>",
117    "Each of the sequences with a given ID have the same amino acid sequence, and hence are the same polypeptide, ",    "Each of these proteins are present in other databases, and this table shows you the ID of the identical proteins in those databases</p>\n",
   "even though they may come from different organisms.</p>",  
118    "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",    "<p>The links will take you to the respective databases for each of the other protein families.\n</p>",
   $cgi->start_form(-method=>'get'),  
119    &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),    &HTML::make_table($col_hdrs, $tab, "Proteins in " . $fig->family_function($fam) . " ($fam)"),
   $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>'family', -value=>"$fam"),  
   $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),  
   $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),  
   $cgi->end_form;  
120   }   }
121  }  }
122    
# Line 229  Line 211 
211    }    }
212    
213    # create a list of families that we know about    # create a list of families that we know about
214    my @fams=grep {!/$col/} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;    my @fams=grep {$_ ne $col} sort {scalar(keys %{$fams->{$b}}) <=> scalar(keys %{$fams->{$a}})} keys %$fams;
215    unshift @fams, $col;    unshift @fams, $col;
216    
217    my $tab=[[["Number of proteins in family", "th colspan=4"], map {scalar(keys %{$fams->{$_}})} @fams]];    my $tab=[[["Number of proteins in family", "th colspan=4"], map {scalar(keys %{$fams->{$_}})} @fams]];
# Line 451  Line 433 
433    
434  sub set_of_equivs {  sub set_of_equivs {
435   ($fig, $cgi, $html)=@_;   ($fig, $cgi, $html)=@_;
436   foreach my $peg ($cgi->param('prot')) {   my $peg=$cgi->param('prot');
   my $cid=$fig->prot_to_cid($peg);  
   my @equiv=$fig->cid_to_prots($cid);  
437    my $tab=[];    my $tab=[];
438    my $allfams;    my $allfams;
439    
440    #begin the html so we can add hidden things    #begin the html so we can add hidden things
441    push @$html, $cgi->start_form(-method=>'get'), $cgi->p("For protein <b>$peg</b>, which has the unique ID <b>$cid</b>, this is the EQUIV set.");   my $genusspecies=$fig->genus_species($fig->genome_of($peg));
442    push @$html, $cgi->hidden(-name=>'querycid', -value=>$fig->prot_to_cid($peg));   push @$html, (
443            $cgi->start_form(-method=>'get'),  "\n",
444            $cgi->p("<h1>Protein <b>$peg</b>: $genusspecies</h1>"), "\n",
445            $cgi->hidden(-name=>'querycid', -value=>$fig->prot_to_cid($peg)), "\n",
446            $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table below shows all the families that contain a protein with the same sequence as <b>$peg</b>, although each family uses different IDs. The number of different IDs in each protein family and the number of unique protein sequences in each family are also shown. The latter is often less than the former since many databases contain identical proteins with different identifiers (for example the same protein from different <i>$genusspecies</i> genome sequences).', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
447            );
448    
449    # this block will make a table with all IDs of all proteins in CID.     foreach my $fam ($fig->families_for_protein($peg))
   # Ross doesn't want that, so we don't use it  
   if (0)  
   {  
    map  
    {  
     my $id=$_;  
     map  
     {  
      push @$tab, [$fig->prot_to_cid($id), $id, &functionate($id), $_, $fig->family_function($_)];  
      $allfams->{$_}="$_ : " . $fig->family_function($_);  
     } $fig->families_for_protein($id);  
    } @equiv;  
   }  
   else  
   {  
    # instead we just show our query protein  
    map  
450     {     {
451      my $ffunc=$fig->family_function($_) || " &nbsp; ";          my $ffunc=$fig->family_function($fam) || " &nbsp; ";
452      push @$tab, [$fig->prot_to_cid($peg), $peg, &functionate($peg), $_, $ffunc, $fig->ext_sz_family($_)];          push @$tab, [
453      $allfams->{$_}="$_ : " . $fig->family_function($_);                          &protein_link($peg),
454     } $fig->families_for_protein($peg);                          scalar($fig->function_of($peg)),
455                            "<a href=\'proteinfamilies.cgi?family=$fam&user=$user\'>$fam</a>",
456                            $ffunc,
457                            $fig->ext_sz_family($fam),
458                            $fig->sz_family($fam),
459                        ];
460            $allfams->{$fam}="$fam :" . $fig->family_function($fam);
461    }    }
462    
463    
464    $tab=&HTML::merge_table_rows($tab, {3=>1, 4=>1});    $tab=&HTML::merge_table_rows($tab, {3=>1, 4=>1});
465    
466    my $radbut={    my $radbut={
467     "1not2"=>"In family one and NOT family two\n",     "1not2"=>"In family one and NOT family two\n",
468     "2not1"=>"In family two and NOT family one\n",     "2not1"=>"In family two and NOT family one\n",
469     "1and2"=>"In both families\n"     "1and2"=>"In both families (intersection)\n",
470       "1or2" =>"In either family (union)\n",
471     };     };
472    my $col_hdrs=['CID', 'Protein', 'Function of Proteins', 'Family', 'Family Function', 'Ext. IDs In Family'];  
473      # sort the list of families in this table but put the fig families at the beginning of the list
474      my @familylist=sort {$a cmp $b} grep {$_ !~ /^fig/} keys %$allfams;
475      unshift @familylist, sort {$a cmp $b} grep {$_ =~ /^fig/} keys %$allfams;
476    
477      my $col_hdrs=['Protein', 'Function', 'Family', 'Family Function', 'External<br>IDs In<br>Family', 'Unique<br>Proteins<br>In Family'];
478    push @$html, &HTML::make_table($col_hdrs, $tab, ""),  "\n", $cgi->p("To differentiate families in this table, please choose two families:"),    push @$html, &HTML::make_table($col_hdrs, $tab, ""),  "\n", $cgi->p("To differentiate families in this table, please choose two families:"),
479    "Family 1: &nbsp; ", $cgi->popup_menu(-name=>"family1", -values=>[sort {$a cmp $b} keys %$allfams], -labels=>$allfams, -default=>[sort {$a cmp $b} keys %$allfams]->[0]),    "Family 1: &nbsp; ", $cgi->popup_menu(-name=>"family1", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[0]),
480    " <br /> Family 2: &nbsp; ", $cgi->popup_menu(-name=>"family2", -values=>[sort {$a cmp $b} keys %$allfams], -labels=>$allfams, -default=>[sort {$a cmp $b} keys %$allfams]->[1]),    " <br /> Family 2: &nbsp; ", $cgi->popup_menu(-name=>"family2", -values=>\@familylist, -labels=>$allfams, -default=>$familylist[1]),
481    $cgi->p("Show proteins:<br /><ul>\n",    $cgi->p("Show proteins:<br /><ul>\n",
482    $cgi->radio_group(-name=>"diff", -values=>[keys %$radbut], -labels=>$radbut, -rows=>3),    $cgi->radio_group(-name=>"diff", -values=>[keys %$radbut], -labels=>$radbut, -rows=>4),
483    "</ul>\n"),    "</ul>\n"),
484    $cgi->hidden(-name=>'prot', -value=>$peg),    $cgi->hidden(-name=>'prot', -value=>$peg),
485    $cgi->submit(-name=>"differentiate", -value=>"Differentiate these families"), $cgi->reset, $cgi->end_form();    $cgi->submit(-name=>"differentiate", -value=>"Compare these families"), $cgi->reset, $cgi->end_form();
  }  
486  }  }
487    
488  #  $cgi->p("I am most interested in proteins from ", &choose_focus($cgi), " that are missing from these families\n"),    # initially I added this option in, with appropriate help text, but then I added the union option, and I think that surplants this, so I removed it!
489      # $cgi->a({class=>"help", onMouseover=>"javascript:if(!this.tooltip) this.tooltip=new Popup_Tooltip(this, 'Help', 'The table will show only those proteins from one of the families that contains proteins that could be in the other family but are not. Most likely this will only be a few proteins, but the table above shows that there are a large number of proteins in one family but not another. Many of these are proteins that are missing from the database. Checking this box will show all the proteins from the chosen families, not just those with missing proteins. By default, you should not check this box.', ''); this.tooltip.addHandler(); return false;", href=>"Html/ProteinFamilies.html"}, "Help"),
490      # "Show all correspondences, not just those with missing proteins: ", $cgi->checkbox(-name=>"show", -value=>"all", -label=>""),
491    
492    
493  sub differentiate {  sub differentiate {
494   ($fig, $cgi, $html)=@_;   ($fig, $cgi, $html)=@_;
# Line 523  Line 506 
506   }   }
507   my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);   my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);
508    
509     # figure out our families
510   # figure out all the families we know about   my $peg=$cgi->param('prot');
511   my %families;   my @families=sort {$a cmp $b} grep {$_ ne $fam_id1} grep {$_ ne $fam_id2} $fig->families_for_protein($peg);
  #map  
  #{  
  # my $cid=$_;  
  # map  
  # {  
  #  $families{$_}=1;  
  # }  
  # $fig->in_family($cid);  
  #} @{&set_utilities::union($fam1, $fam2)};  
   
  map {$families{$_}=1} $fig->in_family($cgi->param('querycid'));  
   
  my @families=sort {$a cmp $b} grep {!/$fam_id1/} grep {!/$fam_id2/} keys %families;  
512   unshift @families, ($fam_id1, $fam_id2);   unshift @families, ($fam_id1, $fam_id2);
513   my @source=@families;   my @source=@families;
514   map {/^(.*?)\|/; $_=$1} @source;   map {/^(.*?)\|/; $_=$1} @source;
515    
516   # now figure out all the external IDs in those families   # now figure out all the external IDs in those families
517   my $extids;   my $extids;
518   map   foreach my $fam (@families)
  {  
   my $fam=$_;  
   map  
519    {    {
520     $extids->{$_}->{$fam}=1;    map {$extids->{$_}->{$fam}=1} $fig->ext_ids_in_family($fam);
521    } $fig->ext_ids_in_family($fam);   }
  } @families;  
522    
523   # finally generate the table. Note that there are three different arrays that we operate on depending on the user input   # finally generate the table. Note that there are three different arrays that we operate on depending on the user input
524   # but it really only changes which set algorith we use. Each array is handled identically.   # but it really only changes which set algorith we use. Each array is handled identically.
525   my $tab;   my $tab;
526   map  
527     # This block of code is the one that really decides what the data is that is analyzed.
528     # $fam1 and $fam2 are references to arrays containing all the CIDs in each family. We combine them with intersection, union, or differences
529     # and then process the remaining IDs.
530    
531     my $set=[];
532     if ($cgi->param("diff") eq "1and2")
533     {
534            $set=&set_utilities::intersection($fam1, $fam2);
535     }
536     elsif ($cgi->param("diff") eq "1not2")
537     {
538            $set=&set_utilities::set_diff($fam1, $fam2);
539     }
540     elsif ($cgi->param("diff") eq "2not1")
541   {   {
542    my $cid=$_;          $set=&set_utilities::set_diff($fam2, $fam1);
543    my $row=["<a href='proteinfamilies.cgi?prot=$cid'>$cid</a>"];   }
544     elsif ($cgi->param("diff") eq "1or2")
545     {
546            $set=&set_utilities::union($fam1, $fam2);
547     }
548    
549     print STDERR "FAMILIES: ", join(" ", @families), "\n";
550     foreach my $cid (@$set)
551     {
552      #my $row=["<a href='proteinfamilies.cgi?prot=$cid'>$cid</a>"];
553      my $row=[];
554    my $seen; my $mismatchcolor;    my $seen; my $mismatchcolor;
555    map    foreach my $prot (sort $fig->cid_to_prots($cid))
556    {    {
557     my $prot=$_;     for (my $i=0; $i<=$#families; $i++)
    map  
558     {     {
559      # add the protein info to the cell in the table if it this family has that protein. Note that we have seen it, and increment the column counter      # add the protein info to the cell in the table if it this family has that protein. Note that we have seen it, and increment the column counter
560      # this if is if the protein that we are looking at is in the family for this column then add it      # this if is if the protein that we are looking at is in the family for this column then add it
561    
562      # if the protein is not added, we want to know if it has the same start characters as the family (i.e. from the same source), and note that.      # if the protein is not added, we want to know if it has the same start characters as the family (i.e. from the same source), and note that.
563      ($extids->{$prot}->{$families[$_]}) ?      if ($extids->{$prot}->{$families[$i]})
564      eval {$seen->{$prot}=1; $row->[$_+1] .= &protein_link($prot, $families[$_]) . "<br />" } :      {
565      ($prot =~ /^$source[$_]/) ? eval          $seen->{$prot}=1;
566      {$mismatchcolor->{$_+1}=1; $row->[$_+1] .= &protein_link($prot, $families[$_]) . "<br />" } :          $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
567      1;      }
568     } (0 .. $#families);      elsif ($prot =~ /^$source[$i]/)
569    } sort $fig->cid_to_prots($cid);      {
570    unless ($#$row == $#families+1) {$#$row=$#families+1}          $mismatchcolor->{$i}=1;
571            $row->[$i] .= &protein_link($prot, 1, $families[$i]) . "<br />";
572        }
573       }
574      }
575    
576    #$row->[1] = [join("<br />", map {&protein_link($_)} grep {/^$focus/} grep {!$seen->{$_}} $fig->cid_to_prots($cid)), "td style='background-color: #FF3366'"];    unless ($#$row == $#families) {$#$row=$#families}
   #$row->[1] = [join("<br />", map {&protein_link($_)} grep {/^$focus/} grep {!$mismatch->{$_}} grep {!$seen->{$_}} $fig->cid_to_prots($cid)), "td style='background-color: #FF3366'"];  
577    
578      # color those cells that have a mismatch. note that this colors the whole cell even if there is more than one protein mismatching
579    map {$row->[$_] = [$row->[$_], "td style='background-color: #FF3366'"]} keys %$mismatchcolor;    map {$row->[$_] = [$row->[$_], "td style='background-color: #FF3366'"]} keys %$mismatchcolor;
580      # change empty cells
581    map {$row->[$_] = " &nbsp; " unless ($row->[$_])} (0 .. $#$row);    map {$row->[$_] = " &nbsp; " unless ($row->[$_])} (0 .. $#$row);
   #push @$tab, $row if ($row->[1]->[0]);  
582    
583    # if we want to show everything do so, otherwise only show the rows where there is a missing protein    # if we want to show everything do so, otherwise only show the rows where there is a missing protein
584    if ($cgi->param("diff") eq "1and2" || $cgi->param('show') eq "all")    if (($cgi->param("diff") eq "1and2") || ($cgi->param("diff") eq "1or2") || ($cgi->param('show') eq "all"))
585    {    {
586     push @$tab, $row;     push @$tab, $row;
587    }    }
588    elsif ($cgi->param("diff") eq "1not2" && $row->[2] ne " &nbsp; ")    elsif ($cgi->param("diff") eq "1not2" && $row->[1] ne " &nbsp; ")
589    {    {
590     push @$tab, $row;     push @$tab, $row;
591    }    }
592    elsif ($cgi->param("diff") eq "2not1" && $row->[1] ne " &nbsp; ")    elsif ($cgi->param("diff") eq "2not1" && $row->[0] ne " &nbsp; ")
593    {    {
594     #($row->[1], $row->[2])=($row->[2], $row->[1]);     #($row->[1], $row->[2])=($row->[2], $row->[1]);
595     push @$tab, $row;     push @$tab, $row;
596    }    }
597   }   }
  ($cgi->param("diff") eq "1and2") ? @{&set_utilities::intersection($fam1, $fam2)} :  
  ($cgi->param("diff") eq "1not2") ? @{&set_utilities::set_diff($fam1, $fam2)} :  
  ($cgi->param("diff") eq "2not1") ? @{&set_utilities::set_diff($fam2, $fam1)} : ();  
598    
599    
600   my $title="Proteins in ";   #generate the titles
601     my $title;
602   ($cgi->param("diff") eq "1and2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :   ($cgi->param("diff") eq "1and2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
603     ($cgi->param("diff") eq "1or2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) OR in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
604   ($cgi->param("diff") eq "1not2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) BUT NOT in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :   ($cgi->param("diff") eq "1not2") ? $title.=$fig->family_function($fam_id1). " ($fam_id1) BUT NOT in " . $fig->family_function($fam_id2). " ($fam_id2)\n" :
605   ($cgi->param("diff") eq "2not1") ? $title.=$fig->family_function($fam_id2). " ($fam_id2) BUT NOT in " . $fig->family_function($fam_id1). " ($fam_id1)\n" : 1;   ($cgi->param("diff") eq "2not1") ? $title.=$fig->family_function($fam_id2). " ($fam_id2) BUT NOT in " . $fig->family_function($fam_id1). " ($fam_id1)\n" : 1;
606    
607   # add the count of the number of members in the family   push @$html, (
608   #my @colcounts=map {$column->{$_}} (1 .. $#families+2); $colcounts[0] = [$colcounts[0], "td style='background-color: #FF3366'"];      "<h3 style='text-align: center'>Comparison of proteins in $title</h3>\n",
609   #my @sz=map {$fig->ext_sz_family($_)} @families;      "<p>The table shows those proteins that are in $title. The rows show proteins with the same sequence. All the IDs along the row are different IDs from identical protein sequences. The columns of the table are different protein families. If a protein in an individual cell has a red background, that protein is either not in the family for that column, or is in that family and another family in the same database.</p>\n",
610   #unshift @$tab, [["# Ext. IDs in fam", "th "], ['', "td style='background-color: #FF3366'"], @sz];      "<p>To find out which families each protein is in, scroll the mouse over the link to the protein database. The popup window will show you the protein families for each protein. Note, however, if this protein is in the family for that column only that family is currently shown. If the window has a green background the protein is in the same family as the column as a whole. If the window has a red background the protein is in a different family than the column as a whole. The red background and the red cell color are complimentary and reinforce that these are proteins you should look at.</p>\n",
611   #unshift @$tab, [["Ext. IDs shown", "th "], @colcounts];      );
   
  # remove empty columns from the table  
  #for (my $y=$#families+2; $y>3; $y--)  
  #{  
   # do it backwards so the splice still works, and stop at 3 since the first four columns are the ones we need  
   #next if ($column->{$y});  
   #splice(@families, $y-2, 1);  
   #map {splice(@$_, $y, 1)} @$tab;  
  #}  
612    
613    
  #my @headers=map {$_ = "$_: <a href='proteinfamilies.cgi?family=$families[$_]'>$source[$_]</a>"} (0..$#families);  
614   my @headers=@families;   my @headers=@families;
  #($cgi->param("diff") eq "2not1") ? ($headers[0], $headers[1])=($headers[1], $headers[0]) : 1;  
615   map {$_ = "<a " . FIGjs::mouseover("Column Family", $fig->family_function($_) . " ($_)", '') . " href='proteinfamilies.cgi?family=$_'>$_</a>"} @headers;   map {$_ = "<a " . FIGjs::mouseover("Column Family", $fig->family_function($_) . " ($_)", '') . " href='proteinfamilies.cgi?family=$_'>$_</a>"} @headers;
  #my $colhdrs=["CID", ["Proteins not in families", "th style='background-color: #FF3366'"], @headers];  
  my $colhdrs=["CID", @headers];  
616   if ($tab)   if ($tab)
617   {   {
618    push @$html, HTML::make_table($colhdrs, $tab, $title);    push @$html, HTML::make_table(\@headers, $tab, "Proteins In $title");
619      push @$html, "<p> &nbsp; </p>\n"; # this is filler to shift the page down a little and allow room for the mouseover
620   }   }
621   else   else
622   {   {
623    push @$html, $cgi->h3("<div style='color: blue; text-align: center'><p>Sorry there were no protein families that satisfied looking for</p>\n<p>$title</p></span>");    my $sorry="<p>Sorry there were no protein families that satisfied looking for</p>\n<p>$title</p>";
624   }    if (($cgi->param("diff") eq "1not2") || ($cgi->param("diff") eq "2not1")) {$sorry .= "<p>and had candidate proteins that could be in those families</p>"}
625  }    push @$html, $cgi->h3("<div style='color: blue; text-align: center'>$sorry</span>");
   
   
 sub old_differentiate {  
  my ($fig, $cgi, $html)=@_;  
   
  my $focus=$cgi->param('focus') or 'all'; # these are the things that we are interested in  
  undef $focus if ($focus eq "all");  
   
  my ($fam_id1, $fam_id2)=($cgi->param('family1'), $cgi->param('family2'));  
  if ($fam_id1 eq $fam_id2) {  
   push @$html, "<h2 style='color: red'>Please choose two different protein families</h2>";  
   return;  
  }  
  my ($fam1, $fam2)=([$fig->ids_in_family($fam_id1)], [$fig->ids_in_family($fam_id2)]);  
   
  if ($cgi->param("1not2")) {  
   my $tab;  
   map {  
     my $cid=$_;  
     map {push @$tab, [$cid, $_, &functionate($_)]} sort grep {/^$focus/} $fig->cid_to_prots($cid)  
     } sort @{&set_utilities::set_diff($fam1, $fam2)};  
   
   $tab=&HTML::merge_table_rows($tab);  
   push @$html, $cgi->h3("Proteins that are in $fam_id1 (" , $fig->family_function($fam_id1) , ") and not in $fam_id2 (" ,  
           $fig->family_function($fam_id2) . ")\n"), &HTML::make_table(["CID", "Protein ID", "Function"], $tab, "");  
626    }    }
   
   
  if ($cgi->param("2not1")) {  
   
    ###########NOTICE THIS REDIRECT!!!  
    ($fam_id1, $fam_id2)=($fam_id2, $fam_id1);  
    ($fam1, $fam2)=($fam2, $fam1);  
    $cgi->param("1not2c", "1");  
627    }    }
   if (0) {  
   my $tab;  
   map {  
     my $cid=$_;  
     map {push @$tab, [$cid, $_, &functionate($_)]} sort grep {/^$focus/} $fig->cid_to_prots($cid)  
     } sort @{&set_utilities::set_diff($fam2, $fam1)};  
628    
629    $tab=&HTML::merge_table_rows($tab);  =head2 protein_link()
   push @$html, $cgi->hr, $cgi->h3("Proteins that are in $fam_id2 (" , $fig->family_function($fam_id2) , ") and not in $fam_id1 (" ,  
           $fig->family_function($fam_id1) . ")\n"), &HTML::make_table(["CID", "Protein ID", "Function"], $tab, "");  
  }  
630    
631   if ($cgi->param("1and2")) {  This takes a protein ID and returns the link (full link including ID) back to the appropriate database.
   my $tab;  
   my @evidence;  
   map {  
     my $cid=$_;  
     #push @evidence, &compare_two_cids($cgi->param('querycid'), $cid);  
     push @evidence, &IHatePfams($cgi->param('querycid'), $cid);  
     map {push @$tab, [$cid, $_, &functionate($_)]} sort grep {/^$focus/} $fig->cid_to_prots($cid)  
     } @{&set_utilities::intersection($fam1, $fam2)};  
   
   $tab=&HTML::merge_table_rows($tab);  
   push @$html, $cgi->hr, $cgi->h3("Proteins that are in both $fam_id1 (" , $fig->family_function($fam_id1) , ") and in $fam_id2 (" ,  
           $fig->family_function($fam_id2) . ")\n"), &HTML::make_table(["CID", "Protein ID", "Function"], $tab, ""),  
           $cgi->hr, $cgi->h3("Evidence for the connections between proteins and the families they are in"),  
           $cgi->p("These tables show the connections between the families. In the first row of each table is the initial family that you chose, and in the second row is the comparator. The first column is the CID and the remaining columns are the protein families that these two CIDs are in. In each cell are the external IDs that are (a) in the CIDs per the row headers AND (b) in the families per column headers. Thus, if you are here from <a href=\"http://listeria.uchicago.edu/dev/FIG/proteinfamilies.cgi?querycid=965112&family1=fig%7CPF002033&family2=mcl%7CORTHOMCL676&focus=all&1and2=on&differentiate=Differentiate+these+families&.cgifields=1not2&.cgifields=2not1&.cgifields=1and2\">this link</a> cog|thrB is in the family cog|COG0083 and has the ID 965112 and sp|P00547 has the ID 965112 and is in the families pfam|PB000121, pfam|PB007585, pfam|PF00288.12, pir|PIRSF000676, sp|PS00627, and tigr|TIGR00191"),  
           @evidence;  
  }  
   
  if ($cgi->param("1and2b")) {  
   # figure out all the families we know about  
   my %families;  
   map  
   {  
    my $cid=$_;  
    map  
    {  
     $families{$_}=1;  
    }  
    $fig->in_family($cid);  
   } @{&set_utilities::intersection($fam1, $fam2)};  
   my @families=sort {$a cmp $b} keys %families;  
632    
633    # now figure out all the external IDs in those families  If addmouseover is true then a mouseover will be added showing the families the peg is in
   my $extids;  
   map  
   {  
    my $fam=$_;  
    map  
    {  
     $extids->{$_}->{$fam}=1;  
    } $fig->ext_ids_in_family($fam);  
   } @families;  
634    
635    my $tab;  If $fam is provided it will be the header of the mouseover popup.
   map  
   {  
    my $cid=$_;  
    map  
    {  
     my $prot=$_;  
     my $row=[$cid, $prot, &functionate($prot)];  
     map {($extids->{$prot}->{$_}) ? push @$row, $prot : push @$row, " &nbsp; "} @families;  
     push @$tab, $row;  
    } sort grep {/^$focus/} $fig->cid_to_prots($cid)  
   } @{&set_utilities::intersection($fam1, $fam2)};  
636    
637    my $skip;  =cut
   map {$skip->{$_+4}=1} (0 .. $#families);  
   $tab=&HTML::merge_table_rows($tab, $skip);  
638    
639    map {$_ = "<a href='proteinfamilies.cgi?family=$_'>$_</a>"} @families;  sub protein_link {
640    my $colhdrs=["CID", "Protein", "Function", @families];   my ($p, $addmouseover, $fam) =@_;
641    my $title="Proteins in ". $fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n";#$fam_id1, $fam_id2   my %proteinbase=(
642    push @$html, HTML::make_table($colhdrs, $tab, $title);    "fig"         => "protein.cgi?user=$user&prot=fig|",
643   }    "cog"  => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",
644      "sp"   => "http://www.expasy.org/uniprot/",
645      "tr"   => "http://www.expasy.org/uniprot/",
646      "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",
647     );
648    
649   if ($cgi->param("1and2c")) {   my $mouseovertitle="Protein Families";
650    # figure out all the families we know about   if ($fam)
   my %families;  
   map  
   {  
    my $cid=$_;  
    map  
651     {     {
652      $families{$_}=1;          $mouseovertitle="<i>Column family: " . $fig->family_function($fam) . " ($fam)</i>";
653     }     }
654     $fig->in_family($cid);   my $familiesforp = "<b>Families for $p:</b><br>";
655    } @{&set_utilities::intersection($fam1, $fam2)};   my ($hcolor, $bgcolor)=('#11AA66','#BBFFBB'); # text background color.
   my @families=sort {$a cmp $b} grep {!/$fam_id1/} grep {!/$fam_id2/} keys %families;  
   unshift @families, ($fam_id1, $fam_id2);  
656    
657    # now figure out all the external IDs in those families   # if the protein is in our family of interest show just that, otherwise show all the families
658    my $extids;   my @thisfam=$fig->ext_family_for_id($p);
659    map   if (grep {$_ eq $fam} @thisfam)
660    {    {
661     my $fam=$_;          $familiesforp .= $fig->family_function($fam) . " ($fam)";
    map  
    {  
     $extids->{$_}->{$fam}=1;  
    } $fig->ext_ids_in_family($fam);  
   } @families;  
   
   my $tab;  
   map  
   {  
    my $cid=$_;  
    my $row=[$cid];  
    map  
    {  
     my $prot=$_;  
     map {($extids->{$prot}->{$families[$_]}) ? $row->[$_+1] = $prot : 1 } (0 .. $#families);  
    } sort grep {/^$focus/} $fig->cid_to_prots($cid);  
    unless ($#$row == $#families+1) {$#$row=$#families+1}  
    push @$tab, $row;  
   } @{&set_utilities::intersection($fam1, $fam2)};  
   
   $tab=&HTML::merge_table_rows($tab);  
   
   map {$_ = "<a href='proteinfamilies.cgi?family=$_'>$_</a>"} @families;  
   my $colhdrs=["CID", @families];  
   my $title="Proteins in ". $fig->family_function($fam_id1). " ($fam_id1) AND in " . $fig->family_function($fam_id2). " ($fam_id2)\n";#$fam_id1, $fam_id2  
   push @$html, HTML::make_table($colhdrs, $tab, $title);  
662   }   }
663     elsif (scalar(@thisfam))
  if ($cgi->param("1not2c")) {  
   # figure out all the families we know about  
   my %families;  
   map  
664    {    {
665     my $cid=$_;          $familiesforp .= "<ul>" . join("", map {"<li> " . $fig->family_function($_) . " ($_)</li>"} @thisfam) . "</ul>";
666     map          if (!$fam) {($hcolor, $bgcolor)=('','')} # use the default colors
667     {          else {($hcolor, $bgcolor)=('#CC0000', '#FF3366')} # we're doing a comparison and the families are different so color them red
     $families{$_}=1;  
668     }     }
669     $fig->in_family($cid);   else
   } @{&set_utilities::set_diff($fam1, $fam2)};  
   my @families=sort {$a cmp $b} grep {!/$fam_id1/} grep {!/$fam_id2/} keys %families;  
   unshift @families, ($fam_id1, $fam_id2);  
   
   # now figure out all the external IDs in those families  
   my $extids;  
   map  
   {  
    my $fam=$_;  
    map  
    {  
     $extids->{$_}->{$fam}=1;  
    } $fig->ext_ids_in_family($fam);  
   } @families;  
   
   my $tab;  
   map  
   {  
    my $cid=$_;  
    my $row=["<a href='proteinfamilies.cgi?prot=$cid'>CID $cid</a>"];  
    my $seen;  
    map  
    {  
     my $prot=$_;  
     my $column;  
     map  
670      {      {
671       ($extids->{$prot}->{$families[$_]}) ?          $familiesforp="<b>Families for $p:</b><br>No protein families";
      eval {$seen->{$prot}=1; $row->[$_+2] = &protein_link($prot) ; $column->{$_+2}++} :  
      1  
     } (0 .. $#families);  
    } sort grep {/^$focus/} $fig->cid_to_prots($cid);  
    unless ($#$row == $#families+2) {$#$row=$#families+2}  
   
    $row->[1] = [join("<br />", map {&protein_link($_, $families[0])} grep {!$seen->{$_}} $fig->cid_to_prots($cid)), "td style='background-color: #FF3366'"];  
   
    push @$tab, $row;  
   } @{&set_utilities::set_diff($fam1, $fam2)};  
   
   $tab=&HTML::merge_table_rows($tab);  
   
   map {$_ = "<a href='proteinfamilies.cgi?family=$_'>$_</a>"} @families;  
   my $colhdrs=["CID", "<span style='background-color: #FF3366'>Proteins not one of the families listed</span>", @families];  
   my $title="Proteins in ". $fig->family_function($fam_id1). " ($fam_id1) BUT NOT in " . $fig->family_function($fam_id2). " ($fam_id2)\n";#$fam_id1, $fam_id2  
   push @$html, HTML::make_table($colhdrs, $tab, $title);  
  }  
   
   
672  }  }
673    
674     foreach my $key (keys %proteinbase)
   
 sub functionate {  
  my $peg=shift;  
  return scalar($fig->function_of($peg));  
 }  
   
   
 sub compare_two_cids {  
  my ($id1, $id2)=@_;  
  # which families is ID in and why?  
  # going via the external IDs, although I am not convinced we need to do this  
  my $family;  
  map {  
   my $id=$_;  
   map  
675    {    {
676     map {$family->{$_}->{$id}++} $fig->ext_family_for_id($_)          if ($p =~ /^$key/ && $addmouseover)
   } $fig->cid_to_prots($id);  
  } ($id1, $id2);  
   
  # find the families that have both id1 and id2  
  my $tab;  
  map  
677   {   {
678    push @$tab, [$_, $family->{$_}->{$id1}, $family->{$_}->{$id2}] if ( $family->{$_}->{$id1} &&  $family->{$_}->{$id2} );                  $p =~ s/^(.*?)\|//;
679   } keys %$family;                  $p = "<a " . FIGjs::mouseover($mouseovertitle, $familiesforp, '', '1', $hcolor, $bgcolor) . " href='$proteinbase{$key}$p'>$1|$p</a>";
   
  return &HTML::make_table(["Family", "Cnx to $id1", "Cnx to $id2"], $tab, "Connections (cnx) between proteins and families"), $cgi->p("\n");  
680  }  }
681            elsif ($p =~ /^$key/)
   
 sub IHatePfams {  
  my ($id1, $id2)=@_;  
  # id1 and id2 are cids  
   
  # what families are the cids in?  
  my %families;  
  map {$families{$_}=1} ($fig->in_family($id1), $fig->in_family($id2));  
  my @families=sort {$a cmp $b} keys %families;  
   
  # what external IDs are the cids  
  my %external;  
  map {$external{$_}=$id1} $fig->cid_to_prots($id1);  
  map {$external{$_}=$id2} $fig->cid_to_prots($id2);  
   
   
  my $tab=[];  
  foreach my $cid ($id1, $id2)  
  {  
   my $row=[$cid];  
   foreach my $fam (@families) {  
    # which proteins are in $fam  
    my $cell;  
    foreach my $id ($fig->ext_ids_in_family($fam))  
682     {     {
683      $cell .= "$id<br>" if ($external{$id} eq $cid);                  $p =~ s/^(.*?)\|//;
684     }                  $p = "<a href='$proteinbase{$key}$p'>$1|$p</a>";
    $cell = " &nbsp; " unless ($cell);  
    push @$row, $cell;  
   }  
   push @$tab, $row;  
  }  
   
  &HTML::make_table(["CID", @families], $tab, "Proteins in families");  
685  }  }
   
   
   
 sub protein_link {  
  my ($p, $fam) =@_;  
  my %proteinbase=(  
   "fig"         => "protein.cgi?user=$user&prot=fig|",  
   "cog"  => "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?",  
   "sp"   => "http://www.expasy.org/uniprot/",  
   "tr"   => "http://www.expasy.org/uniprot/",  
   "kegg" => "http://www.genome.jp/dbget-bin/www_bget?",  
  );  
   
  my $mouseovertitle="Protein Families";  
  if ($fam)  
  {  
   $mouseovertitle="<i>Column: " . $fig->family_function($fam) . " ($fam)</i>";  
686   }   }
  my $familiesforp = join("<br />", map {$_ . " : ". $fig->family_function($_)} $fig->ext_family_for_id($p));  
  unless ($familiesforp) {$familiesforp="No protein family"} # this is so there is something (whitespace) to print  
  map {($p =~ /^$_/) ? eval {$p =~ s/^(.*?)\|//; $p = "<a " . FIGjs::mouseover($mouseovertitle, $familiesforp, '') . "href='$proteinbase{$_}$p'>$1|$p</a>"} : 1;} keys %proteinbase;  
687   return $p;   return $p;
688  }  }
689    

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.27

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3