[Bio] / FigWebServices / ma_to_tf_pairs.cgi Repository:
ViewVC logotype

View of /FigWebServices/ma_to_tf_pairs.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (annotate)
Thu Feb 9 04:31:45 2006 UTC (13 years, 9 months ago) by mkubal
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
with orpv

# -*- perl -*-

=pod

=head1 protein_sets_2.cgi

Find transcription factors for affymetrix spot ids 

=cut

use FIG;
use HTML;
use CGI;
my $cgi=new CGI;
use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.

$fig = new FIG;
my $html = [];

unshift(@$html, "<TITLE>Find Transcription Factors for Expressed Genes</TITLE>\n");

my $id_to_ratio;
my $inputs;
if ($cgi->param('request') ) 
{
  my $input =$cgi->param('proteins');
  my @inputs = split("\n",$input);

  if ($cgi->upload('fileupload'))
  {
     my $fh=$cgi->upload('fileupload');
     @inputs = <$fh> ;
  }

  $given = [@inputs];
  $id_to_ratio = &parse_inputs($given); 
 
}

if ($id_to_ratio && $cgi->param('request') eq "Find Transcription Factors") 
{
    &find_tfs($fig,$cgi,$html,$id_to_ratio);
}

if($cgi->param('request') eq "Find Exclusive Combinations") 
{
    &analyze_combinations($fig,$cgi,$html);
}

if ($id_to_ratio && $cgi->param('request') eq "Find Most Frequent Factors") 
{
    &find_most_frequent($fig,$cgi,$html,$tag_to_id);
}

else
{
  &show_initial($fig,$cgi,$html);
  &HTML::show_page($cgi,$html,1);
  exit;
}

sub show_initial {
 my ($fig,$cgi,$html)=@_;
 # generate a blank page
 push @$html, 
 $cgi->start_multipart_form(),
 "<p>Enter affymetrix spot id, expression ratio pairs separated by a space or a tab</p>\n",
 "<p>",
 "<b>Paste pairs here:</b><br>\n",
 $cgi->textarea(-name=>"proteins", -rows=>10, -columns=>40), "<br>\n", 
 "<br><b>Or choose a file here:</b><br>\n",
 $cgi->filefield(-name=>"fileupload", -size=>50), "<br>\n",
 $cgi->submit(-name=>'request', -value=>'Find Transcription Factors'), 
 $cgi->submit(-name=>'request', -value=>'Find Exclusive Combinations'), 
 $cgi->reset, $cgi->end_form;
 return $html;
}

sub find_tfs 
{
  #print STDERR "find_tfs called\n";
  my ($fig,$cgi,$html,$spotid_to_ratio)=@_;
  $new_html = [];
  $dir = "/home/mkubal/public_html";

  my @ids = keys(%{$spotid_to_ratio});  
  
  open(OUT2,">$dir/tfs_to_ratio.txt");
  
  open(OUT3,">$dir/refseq_in_exp.txt");  

  open(IN,"$dir/spotid_to_refseq.txt");  
  %spotid_to_refseq;
  while ($_ = <IN>){
      chomp($_);
      #print STDERR "spid to rs:$_\n";
      @temp = split("\t",$_);
      if($spotid_to_refseq{$temp[0]}){
	  $ref = $spotid_to_refseq{$temp[0]};
          push(@$ref,$temp[1]);
          ##print STDERR "old spotid:$temp[1]\n";
      }
      else{
	  $spotid_to_refseq{$temp[0]} = [$temp[1]];
          ##print STDERR "new spotid:$temp[1]\n";
      }
  }
     
  open(IN2,"$dir/refseq_to_transfactor.txt.single_and_pairs");
  %refseq_to_tf;
  while ($_ = <IN2>){
      chomp($_);
      print STDERR "refseq_to_transfactor:$_\n"; 
      @temp = split("\t",$_);
      if($refseq_to_tf{$temp[0]}){
          $ref = $refseq_to_tf{$temp[0]};
          push(@$ref,$temp[1]);
      }
      else{ 
         $refseq_to_tf{$temp[0]} = [$temp[1]];
      } 
  }
  close(IN2);
  
  push(@$new_html,"<HTML><HEAD>
  <TITLE>strep</TITLE>
  <META NAME='generator' CONTENT='YokMap 1.0.1'>
  <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
  </HEAD>
  <BODY BGCOLOR='#ffffff'>");
  
  push(@$new_html,"<TABLE border><TR><TH>Expression Ratio</TH><TH>Transcription Factor(s)</TH><TH>Affymetrix Spot ID</TH><TH>RefSeq ID</TH></TR>");

  my $row_string = "";
  foreach my $id (@ids)
  {
      #print STDERR "ids here:$id\n"; 
      my $ratio = $spotid_to_ratio->{$id};
      my $refseqs = $spotid_to_refseq{$id};
      foreach $refseq (@$refseqs){
	  print OUT3 "$refseq\n";
	  
	  my $tfs_ref = $refseq_to_tf{$refseq};
	  
	  foreach $tfs (@$tfs_ref){
	      $row_string = "<TR><TD>$ratio</TD><TD>$tfs</TD><TD>$id</TD><TD>$refseq</TD></TR>";
	      push(@$new_html,$row_string);
	  }
	  
	  if($tfs_ref){
	      foreach $tfs (@$tfs_ref){
		  print OUT2 "$tfs\t$ratio\n";
	      }
	  }
      }	  

  }

  close(OUT2);
  close(OUT3);
        
  push(@$new_html,"</TABLE>");
  #push(@$new_html,
  #     "<br><br>",
  #     $cgi->submit(-name=>'request', -value=>'Find Exclusive Combinations'), 
  #     $cgi->submit(-name=>'request', -value=>'Find Most Frequent Factors'));


  &HTML::show_page($cgi,$new_html);
  exit;
}

sub analyze_combinations
{
  my ($fig,$cgi,$html)=@_;
  $new_html = [];
  %significant;
  $dir = "/home/mkubal/public_html";

  open(IN2,"$dir/refseq_to_transfactor.txt.single_and_pairs");
  %tfs_to_refseq;
  while ($_ = <IN2>){
      chomp($_);
      @temp = split("\t",$_);
      if($tfs_to_refseq{$temp[1]}){
          $ref = $tfs_to_refseq{$temp[1]};
          push(@$ref,$temp[0]);
      }
      else{ 
         $tfs_to_refseq{$temp[1]} = [$temp[0]];
      } 
  }
  close(IN2);
  
  open(IN3,"$dir/refseq_in_exp.txt");
  %refseq_in_exp;
  while ($_ = <IN3>){
      chomp($_);
      $refseq_in_exp{$_} = 1;
  }
  close(IN3);
  
  ##print STDERR "made it here\n";  
  open(IN,"$dir/tfs_to_ratio.txt");  
  %tfs_combinations;
  %tfs_counts;
  %tfs_expected;
  %tfs_total;
  open(IN3,"$dir/stats_combined_single_pairs.txt");
  open(SUMMARY,">$dir/pair_stats.summary");
  while ($_ = <IN3>){
      @temp = split("\t",$_);
      $expected = $temp[2];
      $total = $temp[1];
      chomp($expected);
      $tfs_expected{$temp[0]} = $expected;  
      $tfs_total{$temp[0]} = $total; 
  }
  close(IN3);
  
  my @lines;
  while ($_ = <IN>){
      push(@lines,$_);
      #print "input line:$_\n"; 
      chomp($_);
      @temp = split("\t",$_);
      $tfs_combinations{$temp[0]} = 1;
  }
  close(IN);
  
  @negative_exclusives;
  @positive_exclusives;
  @therest;
  
  foreach my $k (keys(%tfs_combinations)){
     ##print STDERR "k:$k\n";
     $sign = "not_set";
     $exclusive = 1; 
     foreach $l (@lines){
	 @temp = split("\t",$l);
	 $tfs = $temp[0];
         $ratio = $temp[1];
         if($k eq $tfs){
             if($tfs_counts{$k}){$tfs_counts{$k} = $tfs_counts{$k} + 1}
             else{$tfs_counts{$k} = 1}

             if($sign eq "not_set"){
		 if($ratio < 0){$sign = "negative"}
		 else{$sign = "positive"}
             }
	     else{
                 $previous_sign = $sign;
		 if($ratio < 0){$sign = "negative"}
		 else{$sign = "positive"}
                 if($previous_sign ne $sign){$exclusive =0}
             }
	 }
     }
    
     if($exclusive){
	if($sign eq "positive"){
	    push(@positive_exclusives,$k)
	}
        else{push(@negative_exclusives,$k)}
      }
     else{push(@therest,$k)} 
 }
     push(@$new_html,"<HTML><HEAD>
     <TITLE>strep</TITLE>
     <META NAME='generator' CONTENT='YokMap 1.0.1'>
     <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
     </HEAD>
     <BODY BGCOLOR='#ffffff'>");
  
     push(@$new_html,"<TABLE border><TR><TH>UP Transcription Factor Combinations</TH><TH>Number of Genes</TH><TH>Observed/Expected</TH></TR>");
     
     foreach my $tfs (@positive_exclusives){
        my $count = $tfs_counts{$tfs};
        my $observed = $count * (1/311);
        my $expected = $tfs_expected{$tfs};
        my $ratio = $observed/$expected; 
        if($ratio > 10 ){ $significant{$tfs} = $ratio};
        my $row_string = "<TR><TD>$tfs</TD><TD>$count</TD><TD>$ratio</TD></TR>";
        push(@$new_html,$row_string);
        print SUMMARY "UP\t$tfs\t$observed\t$expected\t$ratio\n";
	
     } 
         
     push(@$new_html,"</TABLE>");

     push(@$new_html,"<br><br>");
  
     push(@$new_html,"<TABLE border><TR><TH>DOWN Transcription Factor Combinations</TH><TH>Number of Genes</TH><TH>Observed/Expected</TH></TR>");

     foreach my $tfs (@negative_exclusives)
     {
        my $count = $tfs_counts{$tfs};
        my $observed = $count * (1/311);
        my $expected = $tfs_expected{$tfs};
        my $ratio = $observed/$expected; 
        if($ratio > 10 ){ $significant{$tfs} = $ratio};
        my $row_string = "<TR><TD>$tfs</TD><TD>$count</TD><TD>$ratio</TD></TR>";
        push(@$new_html,$row_string);
        print SUMMARY "DOWN\t$tfs\t$observed\t$expected\t$ratio\n";
    }
  
  push(@$new_html,"</TABLE>");
 
  push(@$new_html,"<br><br>");
  
  push(@$new_html,"<TABLE border><TR><TH>MIXED Transcription Factor Combinations</TH><TH>Number of Genes</TH><TH>Observed/Expected</TH></TR>");
  
  foreach my $tfs (@therest)
  {
      my $count = $tfs_counts{$tfs};
      my $observed = $count * (1/311);
      my $expected = $tfs_expected{$tfs};
      my $ratio = $observed/$expected; 
      if($ratio > 10 ){ $significant{$tfs} = $ratio};
      my $row_string = "<TR><TD>$tfs</TD><TD>$count</TD><TD>$ratio</TD></TR>";
      push(@$new_html,$row_string);
      print SUMMARY "MIXED\t$tfs\t$observed\t$expected\t$ratio\n";
  }
        
  push(@$new_html,"</TABLE>");
  
  push(@$new_html,"<br><br>");
  
  push(@$new_html,"<TABLE border><TR><TH>SIGNIFICANT Transcription Factor Combinations</TH><TH>RefSeq in Exp</TH><TH>Other RefSeq</TH><TH>Observed/Expected</TH><TH>ORPV</TH></TR>");

  
  @sorted = sort {$b <=> $a} values(%significant);
  %sorted_hash;
  foreach $s (values(%significant)){
      $sorted_hash{$s} = 1;
  }

  @final;
  %already_in;
  foreach $s (@sorted){
      foreach $tfs (keys(%significant)){
          if($already_in{$tfs}){ $do_nothing = 1}
          else{  
	      $ratio = $significant{$tfs};
	      if( $ratio >= $s){
		  push(@final,$tfs);
		  $already_in{$tfs} = 1;    
	      } 
	  }
      }
  }
  
  foreach my $tfs (@final)
  {
      my $ids = $tfs_to_refseq{$tfs};
      @in_id_strings = ();
      @other_id_strings = (); 
      foreach $id (@$ids){
          if($refseq_in_exp{$id}){
	      $id_string ="<a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=NucCore&cmd=search&term=$id'>$id</a>";
	      push(@in_id_strings,$id_string);
         }
         else {
	     $id_string ="<a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=NucCore&cmd=search&term=$id'>$id</a>";
	     push(@other_id_strings,$id_string);
         }
      }
      
      $in_ids_line = join(",",@in_id_strings);
      $other_ids_line = join(",",@other_id_strings);
      my $ratio = $significant{$tfs};
      my $count = $tfs_counts{$tfs};
      
      my $d = 311;
      my $m = $tfs_total{$tfs};
      my $n = 20659;
      my $orpv = &orpv($count,$d,$m,$n);
      
      my $row_string = "<TR><TD>$tfs</TD><TD>$in_ids_line</TD><TD>$other_ids_line</TD><TD>$ratio</TD><TD>$orpv</TD></TR>";
      push(@$new_html,$row_string);
      print SUMMARY "SIGNIFICANT\t$tfs\t$id\t$ratio\n";
  }
  
  push(@$new_html,"</TABLE>");
  
  &HTML::show_page($cgi,$new_html);
  exit;
}

sub parse_inputs 
{

 my ($given) =@_;
 my $hash;
 foreach my $g (@$given)
 {
     my $id ="";
     my $ratio = "";

     if ($g =~/(.*?)(\t|\s+)(.*)/)
     {
         $id = $1;
         $ratio = $3;
     }

     $hash{$id} = $ratio;
 }
 
 return \%hash;

}
 
sub gammaln{
    my($x) = @_;
    $x = $x -1;
    if($x){
        my $result = ( $x *log($x) ) - $x;
        #print STDERR "gammaln result: $result\n";
	return $result;
        
    }
    else{ return 0}
}

sub lchoose{
    my($n,$x) = @_;
    return &gammaln($n+1) - &gammaln($x+1) - &gammaln($n-$x+1);
}

sub ldhypg {
    my($x,$d,$m,$n)= @_;
    if ($x>$m || $d-$x > $n-$m || $d>$n || $x>$d) {
        #print STDERR "x,d,m,n:$x,$d,$m,$n\n";
	return(-inf)
    }
    else
    {
        my ($p, $q, $r) = (&lchoose($m,$x), &lchoose($n-$m,$d-$x), &lchoose($n,$d));
        #print STDERR "p, q, r = $p\t$q\t$r\n";
        return($p + $q - $r);
        #return(&lchoose($m,$x) + &lchoose($n-$m,$d-$x) - &lchoose($n,$d));
    }
}

sub orpv{
    #print STDERR "orpv called\n";
    my($x,$d,$m,$n) = @_;
    $pval = 0;
    for ( $y=$x; $y <= $n; $y++) {
        $result = &ldhypg($y,$d,$m,$n);
        $exp_of_result = exp($result);
        if($exp_of_result == 1) {$do_nothing = 1}
	else{$pval= $pval+ $exp_of_result}
    }
    #print STDERR "pval: $pval\n";
    return($pval);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3