[Bio] / FigKernelScripts / get_ON_probes.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_ON_probes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Tue May 5 15:38:17 2015 UTC (4 years, 6 months ago) by jdavis
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +6 -0 lines
updated joses new functional roles

use strict;
use Data::Dumper;
#use SAPserver;
#my $sapO = new SAPserver;
use SeedUtils;

use ExpressionDir;

my $usage = "usage: get_ON_probes ExprDir outProbesOnFile outPegsOnFile";
#my $usage = "usage: get_ON_probes Genome probe.occ.table peg.probe.table raw_data.tab > Probe-PEG-Func Table";
my($genome,$probe_occF,$probe_pegF,$raw_dataF, $expr_dir, $output_probesF, $output_pegsF);
(
 ($expr_dir     = shift @ARGV) &&
 ($output_probesF = shift @ARGV) &&
 ($output_pegsF = shift @ARGV)
)
    || die $usage;

my $exprO = ExpressionDir->new($expr_dir);

$genome = $exprO->genome_id;
$probe_occF = $exprO->expr_dir . "/probe.occ.table";
$probe_pegF = $exprO->expr_dir . "/peg.probe.table";
$raw_dataF = $exprO->expr_dir . "/rma_normalized.tab";

my %known_pegs = map { ($_ =~ /^(fig\|\d+\.\d+\.[^\.]+\.\d+)/) ? ($1 => 1) : () } `cut -f1 $raw_dataF`;

#my $hashG = $sapO->all_features( -ids => [$genome], -type => 'peg' );
#my @pegs  = grep { $known_pegs{$_} } @{$hashG->{$genome}};
#my $hashF = $sapO->ids_to_functions(-ids => \@pegs);

my @pegs = $exprO->all_features('peg');
my $hashF = $exprO->ids_to_functions(\@pegs);

my %ONfunc = map { chomp; $_ => 1 } <DATA>;
my %ON     = map { $ONfunc{$hashF->{$_}} ? ($_ => $hashF->{$_}) : () } keys(%$hashF);

my $good_probes;

if (open(PO, "<", $probe_occF))
{
    while (<PO>)
    {
	if ($_ =~ /^(\S+)\t0\t/)
	{
	    $good_probes->{$1} = 1;
	}
    }
    close(PO);
}
else
{
    warn "Cannot open $probe_occF: $!. Treating all probes as good.";
}

my @keep;

my %pegs;
if (open(PP, "<", $probe_pegF))
{

    while (<PP>)
    {
	if (($_ =~ /^(\S+)\t(\S+)/) && $ON{$1} && (!defined($good_probes) || $good_probes->{$2}))
	{
	    push(@keep,"$2\t$1\t$ON{$1}");
	    $pegs{$1}++;
	}
    }
    close(PP);
    
    open(PFILE, ">", $output_probesF) or die "cannot write $output_probesF: $!";
    
    foreach my $x (sort { my($a1,$a2) = ($a =~ /^(\d+)_(\d+)/); 
			  my($b1,$b2) = ($b =~ /^(\d+)_(\d+)/); 
			  ($a1 <=> $b1) or ($a2 <=> $b2) } @keep)
    {
	print PFILE "$x\n";
    }
    close(PFILE);
}
else
{
    warn "Cannot open $probe_pegF: $!; using all pegs";
    $pegs{$_}++ for keys %ON;
}

open(PFILE, ">", $output_pegsF) or die "cannot write $output_pegsF: $!";
for my $peg (sort { SeedUtils::by_fig_id($a, $b) } keys %pegs)
{
    if ($known_pegs{$peg})
    {
	print PFILE "$peg\n";
    }
}
close(PFILE);

__DATA__
Alanyl-tRNA synthetase (EC 6.1.1.7)
Arginyl-tRNA synthetase (EC 6.1.1.19)
Asparaginyl-tRNA synthetase (EC 6.1.1.22)
Aspartyl-tRNA synthetase (EC 6.1.1.12)
Cysteinyl-tRNA synthetase (EC 6.1.1.16)
DNA-directed RNA polymerase alpha subunit (EC 2.7.7.6)
DNA-directed RNA polymerase beta subunit (EC 2.7.7.6)
DNA-directed RNA polymerase beta' subunit (EC 2.7.7.6)
DNA-directed RNA polymerase omega subunit (EC 2.7.7.6)
Glutaminyl-tRNA synthetase (EC 6.1.1.18)
Glutamyl-tRNA synthetase (EC 6.1.1.17)
Glycyl-tRNA synthetase alpha chain (EC 6.1.1.14)
Glycyl-tRNA synthetase beta chain (EC 6.1.1.14)
Histidyl-tRNA synthetase (EC 6.1.1.21)
Isoleucyl-tRNA synthetase (EC 6.1.1.5)
LSU ribosomal protein L10p (P0)
LSU ribosomal protein L11p (L12e)
LSU ribosomal protein L13p (L13Ae)
LSU ribosomal protein L14p (L23e)
LSU ribosomal protein L15p (L27Ae)
LSU ribosomal protein L16p (L10e)
LSU ribosomal protein L17p
LSU ribosomal protein L18p (L5e)
LSU ribosomal protein L19p
LSU ribosomal protein L1p (L10Ae)
LSU ribosomal protein L20p
LSU ribosomal protein L21p
LSU ribosomal protein L22p (L17e)
LSU ribosomal protein L23p (L23Ae)
LSU ribosomal protein L24p (L26e)
LSU ribosomal protein L25p
LSU ribosomal protein L27p
LSU ribosomal protein L28p
LSU ribosomal protein L29p (L35e)
LSU ribosomal protein L2p (L8e)
LSU ribosomal protein L30p (L7e)
LSU ribosomal protein L31p
LSU ribosomal protein L32p
LSU ribosomal protein L33p
LSU ribosomal protein L34p
LSU ribosomal protein L35p
LSU ribosomal protein L36p
LSU ribosomal protein L3p (L3e)
LSU ribosomal protein L4p (L1e)
LSU ribosomal protein L5p (L11e)
LSU ribosomal protein L6p (L9e)
LSU ribosomal protein L7/L12 (L23e)
LSU ribosomal protein L9p
Leucyl-tRNA synthetase (EC 6.1.1.4)
Lysyl-tRNA synthetase (class II) (EC 6.1.1.6)
Methionyl-tRNA synthetase (EC 6.1.1.10)
Phenylalanyl-tRNA synthetase alpha chain (EC 6.1.1.20)
Phenylalanyl-tRNA synthetase beta chain (EC 6.1.1.20)
Prolyl-tRNA synthetase (EC 6.1.1.15)
SSU ribosomal protein S10p (S20e)
SSU ribosomal protein S11p (S14e)
SSU ribosomal protein S12p (S23e)
SSU ribosomal protein S13p (S18e)
SSU ribosomal protein S14p (S29e)
SSU ribosomal protein S15p (S13e)
SSU ribosomal protein S16p
SSU ribosomal protein S17p (S11e)
SSU ribosomal protein S18p
SSU ribosomal protein S19p (S15e)
SSU ribosomal protein S1p
SSU ribosomal protein S20p
SSU ribosomal protein S21p
SSU ribosomal protein S2p (SAe)
SSU ribosomal protein S3p (S3e)
SSU ribosomal protein S4p (S9e)
SSU ribosomal protein S5p (S2e)
SSU ribosomal protein S6p
SSU ribosomal protein S7p (S5e)
SSU ribosomal protein S8p (S15Ae)
SSU ribosomal protein S9p (S16e)
Seryl-tRNA synthetase (EC 6.1.1.11)
Threonyl-tRNA synthetase (EC 6.1.1.3)
Tryptophanyl-tRNA synthetase (EC 6.1.1.2) ## proteobacterial type
Tyrosyl-tRNA synthetase (EC 6.1.1.1)
Valyl-tRNA synthetase (EC 6.1.1.9)
LSU ribosomal protein L36p @ LSU ribosomal protein L36p, zinc-independent
LSU ribosomal protein L32p @ LSU ribosomal protein L32p, zinc-independent
SSU ribosomal protein S4p (S9e) @ SSU ribosomal protein S4p (S9e), zinc-independent
LSU ribosomal protein L36p @ LSU ribosomal protein L36p, zinc-dependent
LSU ribosomal protein L28p @ LSU ribosomal protein L28p, zinc-independent
Tryptophanyl-tRNA synthetase (EC 6.1.1.2) ## cluster 1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3