[Bio] / FigWebServices / proteinfamilies.cgi Repository:
ViewVC logotype

Diff of /FigWebServices/proteinfamilies.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.9, Thu Jul 14 05:16:53 2005 UTC revision 1.10, Thu Jul 14 20:40:22 2005 UTC
# Line 83  Line 83 
83  elsif ($cgi->param('extend_family')) {  elsif ($cgi->param('extend_family')) {
84   &extend_family($fig,$cgi,$html);   &extend_family($fig,$cgi,$html);
85  }  }
86    elsif ($cgi->param('reverse_analyse_family')) {
87     &reverse_analyse_family($fig,$cgi,$html);
88    }
89    elsif ($cgi->param('analyse_family')) {
90     &analyse_family($fig,$cgi,$html);
91    }
92  elsif ($cgi->param('family'))  elsif ($cgi->param('family'))
93  {  {
94   &show_family($fig,$cgi,$html);   &show_family($fig,$cgi,$html);
# Line 137  Line 143 
143    "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",    "<p>$peg is in the following ", scalar(@families), " families. Please choose one or more families using the checkboxes</p>\n",
144    &HTML::make_table($col_hdrs, $tab, "Families for $peg"),  "\n",    &HTML::make_table($col_hdrs, $tab, "Families for $peg"),  "\n",
145    $cgi->submit('Show Proteins In Each Family'),    $cgi->submit('Show Proteins In Each Family'),
146    $cgi->submit(-name=>'proteins_between_two', -value=>"Compare two or more families for missing"), "<br>\n",    $cgi->submit(-name=>'analyse_family', -value=>"Show Proteins that are in family"),
147    $cgi->submit(-name=>'extend_family', -value=>"Extend and contract family"),    $cgi->submit(-name=>'reverse_analyse_family', -value=>"Show Proteins that are NOT in family"),
   $cgi->submit("Combine Families With And"), $cgi->submit("Compare FIG Functions"),  
148    $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",    $cgi->hidden(-name=>'prot'),$cgi->hidden(-name=>"allfams", -value=>\@families), "\n",
149    $cgi->reset, $cgi->end_form;    $cgi->reset, $cgi->end_form;
150   }   }
# Line 147  Line 152 
152    
153  # this was deleted from above  # this was deleted from above
154  #$cgi->submit(-name=>"Suggest", -value=>"Suggest Additional families for this protein"),  #$cgi->submit(-name=>"Suggest", -value=>"Suggest Additional families for this protein"),
155      #$cgi->submit(-name=>'proteins_between_two', -value=>"Compare two or more families for missing"), "<br>\n",
156      #$cgi->submit("Combine Families With And"), $cgi->submit("Compare FIG Functions"),
157    
158  sub show_family {  sub show_family {
159   my ($fig,$cgi,$html)=@_;   my ($fig,$cgi,$html)=@_;
# Line 392  Line 399 
399    
400    
401    
402    sub analyse_family {
403     my ($fig,$cgi,$html)=@_;
404    # here are the questions:
405    # 1. Given a column in a spreadsheet:
406    # 2. Here are the proteins in that column
407    # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
408    #       if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
409    #       so we recommend that a protein be removed from a family.
410    # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
411    # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
412    
413    # Note that column == family, But start with fig and then  allow a replace ID feature like before.
414    
415     #my $focus=$cgi->param('focus') or "fig"; # these are the things that we are interested in
416     my $focus='fig';
417     push @$html, "<h2>Testing: Limited to fig</h2>\n";
418     foreach my $col (grep {$cgi->param($_)} $cgi->param("allfams"))
419     {
420      # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
421      my %proteins_in_col;
422      map {$proteins_in_col{$_}=1} $fig->ids_in_family($col);
423    
424      # @proteins are the proteins in that column, although these are cids and not fids at the moment
425      my $familycount;
426      foreach my $prot (keys %proteins_in_col) {
427       foreach my $fam ($fig->in_family($prot)) {
428        $familycount->{$fam}++;
429       }
430      }
431    
432      my $count_of;
433      my $fams;
434      foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount) {
435       next if ($f eq $col);
436       # It seems that $sz_family is not right
437       map {$fams->{$f}++; $count_of->{$_}->{$f}++}
438            grep {/^$focus/}
439            map {$fig->cid_to_prots($_)}
440            grep {$proteins_in_col{$_}}
441            ($fig->ids_in_family($f));
442      }
443      my $tab=[];
444    
445      # create a list of families that we know about
446    
447      foreach my $fid (sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of) {
448       my @row=($fid, scalar(keys %{$count_of->{$fid}}));
449       print STDERR "Added $fid\n";
450       foreach my $fam (sort {$fams->{$b} <=> $fams->{$a}} keys %$fams) {
451        $count_of->{$fid}->{$fam} ? push @row, [$count_of->{$fid}->{$fam}, "td style='background-color: grey'"] : push @row, " &nbsp ";
452       }
453       push @$tab, \@row;
454      }
455    
456      push @$html, "<h3>Proteins that ARE from ", $fig->family_function($col), " ($col) </h3>\n",
457      &HTML::make_table(["Protein ID", "Number of fams protein is in", (keys %$fams)], $tab,' &nbsp; ');
458     }
459    }
460    
461    
462    
463    sub reverse_analyse_family {
464     my ($fig,$cgi,$html)=@_;
465    # here are the questions:
466    # 1. Given a column in a spreadsheet:
467    # 2. Here are the proteins in that column
468    # 3. For each protein, here are the families that they are in. How many families are unique and how many families is every protein in?
469    #       if we start with a column of 10 proteins, and nine of them are all in the same families and one is not, we want to exclude the one and keep the nine.
470    #       so we recommend that a protein be removed from a family.
471    # 4. For each of the families that are good, which proteins are there in some/most of the families that are not in the column that we are looking at
472    # 5. For each of the families that are good, which proteins are only in one of those families and not in any others?
473    
474    # Note that column == family, But start with fig and then  allow a replace ID feature like before.
475    
476     #my $focus=$cgi->param('focus') or "fig"; # these are the things that we are interested in
477     my $focus='fig';
478     push @$html, "<h2>Testing: Limited to fig</h2>\n";
479     foreach my $col (grep {$cgi->param($_)} $cgi->param("allfams"))
480     {
481      # $col is the column in the spreadsheet. This is really a family, but to visualize and code this I am doing it in a FIG-centric way
482      my %proteins_in_col;
483      map {$proteins_in_col{$_}=1} $fig->ids_in_family($col);
484    
485      # @proteins are the proteins in that column, although these are cids and not fids at the moment
486      my $familycount;
487      foreach my $prot (keys %proteins_in_col) {
488       foreach my $fam ($fig->in_family($prot)) {
489        $familycount->{$fam}++;
490       }
491      }
492    
493      my $count_of;
494      my $fams;
495      foreach my $f (sort {$familycount->{$b} <=> $familycount->{$a}} keys %$familycount) {
496       next if ($f eq $col);
497       # It seems that $sz_family is not right
498       map {$fams->{$f}++; $count_of->{$_}->{$f}++}
499            grep {/^$focus/}
500            map {$fig->cid_to_prots($_)}
501            grep {!$proteins_in_col{$_}}
502            ($fig->ids_in_family($f));
503      }
504      my $tab=[];
505    
506      # create a list of families that we know about
507    
508      foreach my $fid (sort {scalar(keys %{$count_of->{$b}}) <=> scalar(keys %{$count_of->{$a}})} keys %$count_of) {
509       my @row=($fid, scalar(keys %{$count_of->{$fid}}));
510       print STDERR "Added $fid\n";
511       foreach my $fam (sort {$fams->{$b} <=> $fams->{$a}} keys %$fams) {
512        $count_of->{$fid}->{$fam} ? push @row, [$count_of->{$fid}->{$fam}, "td style='background-color: grey'"] : push @row, " &nbsp ";
513       }
514       push @$tab, \@row;
515      }
516    
517      push @$html, "<h3>Proteins that ARE NOT from ", $fig->family_function($col), " ($col) </h3>\n",
518      &HTML::make_table(["Protein ID", "Number of fams protein is in", (keys %$fams)], $tab,' &nbsp; ');
519     }
520    }
521    
522    
523    
524    

Legend:
Removed from v.1.9  
changed lines
  Added in v.1.10

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3