[Bio] / FigKernelScripts / pg_estimate_alien_pegs.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_estimate_alien_pegs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Wed Apr 10 19:15:29 2013 UTC (6 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +12 -3 lines
Modifications to pangenome code to move common code to PG.pm.

use strict;
use Data::Dumper;
use Getopt::Long;
use SeedEnv;
use BlastInterface;
use PG;

my $usage = "usage: pg_estmate_alien_pegs -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

my $pg = new PG($dataD);


my $to_func                = $pg->load_funcs();
my($peg_to_seq,$seq_to_pegs)  = $pg->load_seqs();

my %pegs_in_repeats = map { ($_ =~ /^(\S+)/) ? ($1 => 1) : () } `cat $dataD/pegs.in.repeats`;
my @tuples_for_reps = map { [$_,'',$peg_to_seq->{$_}] } keys(%pegs_in_repeats);
my @tuples_for_all  = map { [$_,'',$peg_to_seq->{$_}] } keys(%$to_func);

use Sim;
my %possible_alien;

my @hits = &BlastInterface::blast(\@tuples_for_all,
				  \@tuples_for_reps,
				  'blastp',
			          { minIden => 0.9, minCovQ => 0.8, threads => 4 });
foreach my $hit (@hits)
{
    $possible_alien{$hit->id1} = 1;
}

open(OUT,">","$dataD/possibly.alien.pegs") || die "could not open $dataD/possibly.alien.pegs";
foreach my $peg (sort { &SeedUtils::by_fig_id($a,$b) } keys(%possible_alien))
{
    my $f = $to_func->{$peg};
    print OUT join("\t",($peg,$f)),"\n";
}
close(OUT);

sub load_funcs {
    my($dataD) = @_;

    my $to_func = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/proposed*functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S.*\S)/)
	    {
		my $peg = $1;
		my $func = $2;
		$func =~ s/\s+\#.*$//;
		$to_func->{$peg} = $func;
	    }
	}
    }

    foreach my $g (`cat $dataD/anno.seed`)
    {
	chop $g;
	foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/assigned_functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S[^\t]*\S)/)
	    {
		my $peg = $1;
		my $func = $2;
		$func =~ s/\s+\#.*$//;
		$to_func->{$peg} = $func;
	    }
	}
    }
    return $to_func;
}

sub load_seqs {
    my($dataD) = @_;

    my $peg_to_seq = {};
    my $seq_to_pegs = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	$/ = "\n>";
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/Features/peg/fasta`)
	{
	    chomp; 
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }

    foreach my $g (`cat $dataD/anno.seed`)
    {
	chop $g;
	$/ = "\n>";
	foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/Features/peg/fasta`)
	{
	    chomp;
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }
    return ($peg_to_seq,$seq_to_pegs);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3