[Bio] / FigWebServices / ma_to_tf.cgi Repository:
ViewVC logotype

View of /FigWebServices/ma_to_tf.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (annotate)
Fri Jan 19 18:00:57 2007 UTC (13 years, 2 months ago) by mkubal
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.3: +26 -7 lines
upgrade

# -*- perl -*-

=pod

=head1 protein_sets_2.cgi

Find transcription factors for affymetrix spot ids 

=cut

use FIG;
use FIG_Config;
use HTML;
use CGI;
my $cgi=new CGI;
use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.

my @list_of_tfs;
open(IN,"/home/mkubal/public_html/all_tfs.txt");
while($_ = <IN>){
    chomp($_);
    push(@list_of_tfs,$_);
}
unshift(@list_of_tfs,undef);
close(IN);


$fig = new FIG;
my $html = [];

unshift(@$html, "<TITLE>Find Transcription Factors for Expressed Genes</TITLE>\n");

my $id_to_ratio;
my $inputs;
if ($cgi->param('request') ) 
{
  my $input =$cgi->param('proteins');
  my @inputs = split("\n",$input);

  if ($cgi->upload('fileupload'))
  {
     my $fh=$cgi->upload('fileupload');
     @inputs = <$fh> ;
  }

  $given = [@inputs];
  $id_to_ratio = &parse_inputs($given); 
 
}

if ($cgi->param('request') eq "Find Transcription Factors By Gene") 
{
    if($cgi->param('gene_search_term')){
	my $gene = $cgi->param('gene_search_term');
	&find_tfs_by_gene($fig,$cgi,$html,$gene);
    }
}

if ($cgi->param('request') eq "Find Genes by Transcription Factor") 
{
    if($cgi->param('tf_search_term')){
	my $tf = $cgi->param('tf_search_term');
	&find_genes_by_tf($fig,$cgi,$html,$tf);
    }
    elsif($cgi->param('tf_search_combos')){
	my $tf = $cgi->param('tf_search_combos');
	&find_genes_by_tf($fig,$cgi,$html,$tf);
    }
}

if ($id_to_ratio && $cgi->param('request') eq "Find Transcription Factors") 
{
    if ($cgi->param('quality_score')){
        my $score = $cgi->param('quality_score'); 
	&find_tfs($fig,$cgi,$html,$id_to_ratio,$score);
    } 
}

if($cgi->param('request') eq "Find Exclusive Factors") 
{
    &analyze_combinations($fig,$cgi,$html);
}

if ($id_to_ratio && $cgi->param('request') eq "Find Most Frequent Factors") 
{
    &find_most_frequent($fig,$cgi,$html,$tag_to_id);
}

else
{
  &show_initial($fig,$cgi,$html);
  &HTML::show_page($cgi,$html,1);
  exit;
}

sub show_initial {
 my ($fig,$cgi,$html)=@_;
 my @q_scores = (1,2,3,4,5,6);
 my @ratio_cutoffs = (0,.5,1.5,2,2,3,4,5); 
# generate a blank page
 push @$html, 
 $cgi->start_multipart_form(),
 "<h3>Search for Transcriptions Factors for a Gene</h3>",
 $cgi->textarea(-name=>"gene_search_term", -rows=>1, -columns=>20),
 $cgi->submit(-name=>'request', -value=>'Find Transcription Factors By Gene'),
 $cgi->br,
 $cgi->br,
 $cgi->hr,
 "<h3>Search for Genes by Transcription Factor</h3>",
 $cgi->textarea(-name=>"tf_search_combos", -rows=>1, -columns=>20),
 "<p>use commas to separate multiple factors in field above</p>",
 $cgi->popup_menu(-name => 'tf_search_term', -values=>\@list_of_tfs),
 $cgi->submit(-name=>'request', -value=>'Find Genes by Transcription Factor'),
 "<p>or select single factor from menu</p>",
 $cgi->hr,
 "<h3>Find Transcription Factors for Genes in Microarray Data</h3>",  
 "<p>Enter affymetrix spot id, expression ratio pairs separated by a space or a tab</p>\n",
  $cgi->textarea(-name=>"proteins", -rows=>10, -columns=>40), 
 $cgi->br,  
 "<p>or choose a file here:</p>",
  $cgi->filefield(-name=>"fileupload", -size=>50),
 $cgi->br,  
 $cgi->br,  
 $cgi->submit(-name=>'request', -value=>'Find Transcription Factors'),
 $cgi->popup_menu(-name => 'quality_score', -values=>\@q_scores),  
 "<p>set quality threshold 1 (lowest) to 6 (highest)</p>",
 $cgi->hr,  
 "<h3>Analyze Found Transcription Fcators</h3>",  
 $cgi->submit(-name=>'request', -value=>'Find Exclusive Factors'), 
 $cgi->popup_menu(-name => 'ratio_cutoff', -values=>\@ratio_cutoffs),
 "<p>set fold change threshold </p>",
 $cgi->br,  
 $cgi->hr,    
 $cgi->reset, $cgi->end_form;
 return $html;
}

sub find_tfs 
{
  my ($fig,$cgi,$html,$spotid_to_ratio,$score)=@_;
  $new_html = [];
  $dir = "/home/mkubal/public_html";

  my @ids = keys(%{$spotid_to_ratio});  
  
  open(OUT2,">$dir/tfs_to_ratio.txt");  
  open(OUT3,">$FIG_Config::temp/download.txt");  
  open(IN,"$dir/spotid_to_refseq.txt");  

  %spotid_to_refseq;
  while ($_ = <IN>){
      chomp($_);
      #print STDERR "spot_to_refseq:$_\n"; 
      @temp = split("\t",$_);
      $spotid_to_refseq{$temp[0]} = $temp[1];
  }
     
  open(IN2,"$dir/refseq_to_transfac.txt");
  %refseq_to_tf;
  %tf_to_tf;
  while ($_ = <IN2>){
      chomp($_);
      @temp = split("\t",$_);
      @interactions = split(",",$temp[5]); 
      if($refseq_to_tf{$temp[0]}){
          if($temp[2] > $score -1){ 
	      $ref = $refseq_to_tf{$temp[0]};
	      push(@$ref,$temp[1]);
              foreach $int (@interactions){
		  if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
                  else{$tf_to_tf{$temp[1]} = [$int] } 
	      }
	  }  
      }
      else{ 
         if($temp[2] > $score -1){ 
            $refseq_to_tf{$temp[0]} = [$temp[1]];
	    foreach $int (@interactions){
		if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
		else{$tf_to_tf{$temp[1]} = [$int]; }
            }
	}
     } 
  }
  
  push(@$new_html,"<HTML><HEAD>
  <TITLE>strep</TITLE>
  <META NAME='generator' CONTENT='YokMap 1.0.1'>
  <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
  </HEAD>
  <BODY BGCOLOR='#ffffff'>");
  
  push(@$new_html,"<br><a href='$FIG_Config::temp_url/download.txt'>download table</a><br>"); 
 
  push(@$new_html,"<TABLE border><TR><TH>Expression Ratio</TH><TH>Transcription Factor(s)</TH><TH>Affymetrix Spot ID</TH><TH>RefSeq ID</TH><TH>Interactions</TH><TH>Annotation</TH></TR>");

  my $row_string = "";
  my %row_already;
  foreach my $id (@ids)
  {
      my $ratio = $spotid_to_ratio->{$id};
      my $refseq = $spotid_to_refseq{$id};
      my $tfs_ref = $refseq_to_tf{$refseq};
      my %temp_hash;
      my %int_hash;
      foreach $k (keys(%temp_hash)){delete($temp_hash{$k})};
      foreach $i (keys(%int_hash)){delete($int_hash{$i})};
      foreach $tfs (@$tfs_ref){
          $interactions = $tf_to_tf{$tfs};
          foreach $int (@$interactions){$int_link = "<a href='$FIG_Config::temp_url/$int.html'>$int</a>";$temp_hash{$int_link} = 1; $int_hash{$int} = 1;}
          $interactions = join(" ",keys(%temp_hash));
          $int_string = join(" ",keys(%int_hash));
          $annotation = "none yet";
          my $tfs_link =  "<a href='$FIG_Config::temp_url/$tfs.html'>$tfs</a>";
	  #my $refseq_link = "<a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$refseq'>$refseq</a>";
	  my $refseq_link = "<a href='http://www.genecards.org/cgi-bin/cardsearch.pl?search=".$refseq."&search_type=kwd'>$refseq</a>";
	  $row_string = "<TR><TD>$ratio</TD><TD>$tfs_link</TD><TD>$id</TD><TD>$refseq_link</TD><TD>$interactions</TD><TD>$annotation</TD></TR>";
          my $download_string = "$ratio\t$tfs\t$id\t$refseq\t$int_string\t$annotation";
          if(!$row_already{$row_string}){
	      push(@$new_html,$row_string);
	      $row_already{$row_string} = 1;
              print OUT3 "$download_string\n"; 
	  }
      }
      
      if($tfs_ref){
	  foreach $tfs (@$tfs_ref){
	       print OUT2 "$tfs\t$ratio\n";
	  }
      }	  
  }

  close(OUT2);
  close(OUT3);
        
  push(@$new_html,"</TABLE>");
  #push(@$new_html,
  #     "<br><br>",
  #     $cgi->submit(-name=>'request', -value=>'Find Exclusive Combinations'), 
  #     $cgi->submit(-name=>'request', -value=>'Find Most Frequent Factors'));


  &HTML::show_page($cgi,$new_html);
  exit;
}

sub find_tfs_by_gene 
{
  my ($fig,$cgi,$html,$gene_search_term)=@_;
  my $refseq = $gene_search_term;
  $new_html = [];
  $dir = "/home/mkubal/public_html";
  my $score = 0;
     
  open(IN2,"$dir/refseq_to_transfac.txt");
  %refseq_to_tf;
  %tf_to_tf;
  while ($_ = <IN2>){
      chomp($_);
      @temp = split("\t",$_);
      @interactions = split(",",$temp[5]); 
      if($refseq_to_tf{$temp[0]}){
          if($temp[2] > $score -1){ 
	      $ref = $refseq_to_tf{$temp[0]};
	      push(@$ref,$temp[1]);
              foreach $int (@interactions){
		  if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
                  else{$tf_to_tf{$temp[1]} = [$int] } 
	      }
	  }  
      }
      else{ 
         if($temp[2] > $score -1){ 
            $refseq_to_tf{$temp[0]} = [$temp[1]];
	    foreach $int (@interactions){
		if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
		else{$tf_to_tf{$temp[1]} = [$int]; }
            }
	}
     } 
  }
  
  push(@$new_html,"<HTML><HEAD>
  <TITLE>factors found by gene id</TITLE>
  <META NAME='generator' CONTENT='YokMap 1.0.1'>
  <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
  </HEAD>
  <BODY BGCOLOR='#ffffff'>");
  
  push(@$new_html,"<TABLE border><TR><TH>Transcription Factor(s)</TH><TH>Interactions</TH><TH>Regulates</TH><TH>Annotation</TH></TR>");

  my $row_string = "";
  my %row_already;
  my $tfs_ref = $refseq_to_tf{$refseq};
  my %temp_hash;
  foreach $k (keys(%temp_hash)){delete($temp_hash{$key})};
  foreach $tfs (@$tfs_ref){
      $interactions = $tf_to_tf{$tfs};
      foreach $int (@$interactions){$int_link = "<a href='$FIG_Config::temp_url/$int.html'>$int</a>";$temp_hash{$int_link} = 1}
      $interactions = join(" ",keys(%temp_hash));
      $annotation = "none yet\n";
      my $tfs_link =  "<a href='$FIG_Config::temp_url/$tfs.html'>$tfs</a>";
      #my $refseq_link = "<a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$refseq'>$refseq</a>";
      my $refseq_link = "<a href='http://www.genecards.org/cgi-bin/cardsearch.pl?search=".$refseq."&search_type=kwd'>$refseq</a>";
      $row_string = "<TR><TD>$tfs_link</TD><TD>$interactions</TD><TD>$refseq_link</TD><TD>$annotation</TD></TR>";
      if(!$row_already{$row_string}){
	  push(@$new_html,$row_string);
	  $row_already{$row_string} = 1;
      }
  }


  push(@$new_html,"</TABLE>");
  &HTML::show_page($cgi,$new_html);
  exit;

}

sub find_genes_by_tf 
{
    my ($fig,$cgi,$html,$tf_search_term)=@_;
    my @list_of_tfs = split(",",$tf_search_term);
    $new_html = [];
    $dir = "/home/mkubal/public_html";
    my $score = 0;
    
    open(IN2,"$dir/refseq_to_transfac.txt");
    %tf_to_refseq;
    $refseq_to_tf;
    %tf_to_tf;
    while ($_ = <IN2>){
	chomp($_);
	@temp = split("\t",$_);
	@interactions = split(",",$temp[5]); 
	if($tf_to_refseq{$temp[1]}){
	    if($temp[2] > $score -1){ 
		$ref = $tf_to_refseq{$temp[1]};
		push(@$ref,$temp[0]);
		foreach $int (@interactions){
		    if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
		    else{$tf_to_tf{$temp[1]} = [$int] } 
		}
	    }  
	}
	else{ 
	    if($temp[2] > $score -1){ 
		$tf_to_refseq{$temp[1]} = [$temp[0]];
		foreach $int (@interactions){
		    if($tf_to_tf{$temp[1]}){ $ref2= $tf_to_tf{$temp[1]}; push(@$ref2,$int)}
		else{$tf_to_tf{$temp[1]} = [$int]; }
		}
	    }
	}
	
	if($refseq_to_tf{$temp[0]}){
	    if($temp[2] > $score -1){ 
		$ref = $refseq_to_tf{$temp[0]};
		push(@$ref,$temp[1]);
	    } 
	}
	else{ 
	    if($temp[2] > $score -1){ 
		$refseq_to_tf{$temp[0]} = [$temp[1]];
	    }
	} 
    }
    
    push(@$new_html,"<HTML><HEAD>
  <TITLE>genes found by tf</TITLE>
  <META NAME='generator' CONTENT='YokMap 1.0.1'>
  <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
  </HEAD>
  <BODY BGCOLOR='#ffffff'>");
    
    push(@$new_html,"<TABLE border><TR><TH>Transcription Factor(s)</TH><TH>Regulates</TH><TH>Annotation</TH></TR>");
    
    my $row_string = "";
    my %row_already;
    
    my @matching_refseqs;
    foreach $rs (keys(%refseq_to_tf)){
	my $tf_refs = $refseq_to_tf{$rs};
	my $matched_all = 1;
	foreach $passed_tf(@list_of_tfs){
	    $match = 0; 
	    foreach $tf (@$tf_refs){
		if($tf eq $passed_tf){$match = 1}
	    }
	    if(!$match){$matched_all = 0; last;}
	}
	if($matched_all){push(@matching_refseqs,$rs)}
    }
    
    my %temp_hash;
   
    foreach $k (keys(%temp_hash)){delete($temp_hash{$key})};
    my $tfs_link;
    my @list_of_links;
    foreach $tf (@list_of_tfs){
	 $tfs_link =  "<a href='$FIG_Config::temp_url/$tf.html'>$tf</a>";
         push(@list_of_links,$tfs_link); 
    }
   
    $tfs_link = join(",",@list_of_links); 

    foreach $refseq (@matching_refseqs){
	$annotation = "none yet\n";
      	#my $refseq_link = "<a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$refseq'>$refseq</a>";
	my $refseq_link = "<a href='http://www.genecards.org/cgi-bin/cardsearch.pl?search=".$refseq."&search_type=kwd'>$refseq</a>";
	$row_string = "<TR><TD>$tfs_link</TD><TD>$refseq_link</TD><TD>$annotation</TD></TR>";
	if(!$row_already{$row_string}){
	    push(@$new_html,$row_string);
	    $row_already{$row_string} = 1;
	}
    }
    
    push(@$new_html,"</TABLE>");
    &HTML::show_page($cgi,$new_html);
    exit;
    
}

sub analyze_combinations
{
  my ($fig,$cgi,$html)=@_;
  $new_html = [];
  $dir = "/home/mkubal/public_html";
  my $ratio_cutoff;
  if($cgi->param('ratio_cutoff')){$ratio_cutoff = $cgi->param('ratio_cutoff');}
  else{ $ratio_cutoff = 0;}
  
  open(IN,"$dir/tfs_to_ratio.txt");  
  #%tfs_combinations;
  #%tfs_counts;
  
  #%tfs_expected;
  #open(IN3,"$dir/stats.txt.redundant");
  #open(SUMMARY,">$dir/redundant_stats.summary");
  #while ($_ = <IN3>){
  #   @temp = split("\t",$_);
  #   $expected = $temp[2];
  #   chomp($expected); 
  #   $tfs_expected{$temp[0]} = $expected;  
  #}
  #close(IN3);

  my @lines;
  while ($_ = <IN>){
      push(@lines,$_);
      #print "input line:$_\n"; 
      chomp($_);
      @temp = split("\t",$_);
      $tfs_combinations{$temp[0]} = 1;
  }
  close(IN);
  
  @negative_exclusives;
  @positive_exclusives;
  
  foreach my $k (keys(%tfs_combinations)){
     #print STDERR "k:$k\n";
     $sign = "not_set";
     $exclusive = 1;
     $skip = 0; 
     foreach $l (@lines){
	 @temp = split("\t",$l);
	 $tfs = $temp[0];
         $ratio = $temp[1];
         if($ratio < 0 && $ratio > ($ratio_cutoff * -1)){$skip = 1}
         elsif($ratio > 0 && $ratio < $ratio_cutoff ){$skip = 1}  
         if($k eq $tfs){
             if($tfs_counts{$k}){$tfs_counts{$k} = $tfs_counts{$k} + 1}
             else{$tfs_counts{$k} = 1}

             if($sign eq "not_set"){
		 if($ratio < 0){$sign = "negative"}
		 else{$sign = "positive"}
             }
	     else{
                 $previous_sign = $sign;
		 if($ratio < 0){$sign = "negative"}
		 else{$sign = "positive"}
                 if($previous_sign ne $sign){$exclusive =0}
             }
	 }
     }
    
     if(!$skip){
	 if($exclusive){
	     if($sign eq "positive"){
		 push(@positive_exclusives,$k)
		 }
	     else{
		 push(@negative_exclusives,$k);
	     }
	 }
     }
 }
  
  push(@$new_html,"<HTML><HEAD>
     <TITLE>strep</TITLE>
     <META NAME='generator' CONTENT='YokMap 1.0.1'>
     <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'>
     </HEAD>
     <BODY BGCOLOR='#ffffff'>");
  
  #push(@$new_html,"<TABLE border><TR><TH>UP Transcription Factor Combinations</TH><TH>Number of Genes</TH><TH>Observed/Expected</TH></TR>");
  push(@$new_html,"<TABLE border><TR><TH>UP Transcription Factor </TH></TR>");
  foreach my $tfs (@positive_exclusives){
      #my $count = $tfs_counts{$tfs};
      #my $observed = $count * (1/4024);
      #  my $expected = $tfs_expected{$tfs};
      #  my $ratio = $observed/$expected; 
      #  my $row_string = "<TR><TD>$tfs</TD><TD>$count</TD><TD>$ratio</TD></TR>";
      my $url = $FIG_Config::temp_url."/$tfs".".html";
      my $row_string = "<TR><TD><a href='$url'>$tfs</a></TD></TR>";
      push(@$new_html,$row_string);
      # print SUMMARY "UP\t$tfs\t$observed\t$expected\t$ratio\n";
     } 
         
     push(@$new_html,"</TABLE>");

     push(@$new_html,"<br><br>");
  
     #push(@$new_html,"<TABLE border><TR><TH>DOWN Transcription Factor Combinations</TH><TH>Number of Genes</TH><TH>Observed/Expected</TH></TR>");
     push(@$new_html,"<TABLE border><TR><TH>DOWN Transcription Factor </TH></TR>");
     foreach my $tfs (@negative_exclusives)
     {
       # my $count = $tfs_counts{$tfs};
       # my $observed = $count * (1/4024);
       # my $expected = $tfs_expected{$tfs};
       # my $ratio = $observed/$expected; 
       # my $row_string = "<TR><TD>$tfs</TD><TD>$count</TD><TD>$ratio</TD></TR>";
       my $url = $FIG_Config::temp_url."/$tfs".".html";
       my $row_string = "<TR><TD><a href='$url'>$tfs</a></TD></TR>";
       push(@$new_html,$row_string);
       # print SUMMARY "DOWN\t$tfs\t$observed\t$expected\t$ratio\n";
     }
        
     push(@$new_html,"</TABLE>");
  
     &HTML::show_page($cgi,$new_html);
     exit;
 
}


sub parse_inputs 
{

 my ($given) =@_;
 my $hash;
 foreach my $g (@$given)
 {
     my $id ="";
     my $ratio = "";

     if ($g =~/(.*?)(\t|\s+)(.*)/)
     {
         $id = $1;
         $ratio = $3;
     }

     $hash{$id} = $ratio;
 }
 
 return \%hash;

}
 

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3