[Bio] / FigKernelScripts / pg_roles_in_some_but_not_X.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_roles_in_some_but_not_X.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Wed Feb 27 22:41:19 2013 UTC (6 years, 9 months ago) by overbeek
Branch: MAIN
Changes since 1.3: +3 -0 lines
fix chomp

use strict;
use Data::Dumper;
use Getopt::Long;
use SeedEnv;

my $usage = "usage: pg_in_some_but_not_X -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

my %anno = map { chop; ($_ => 1) } `cat $dataD/anno.seed`;

my$peg_to_func                = &load_funcs($dataD);
my($peg_to_seq,$seq_to_pegs)  = &load_seqs($dataD);
my($peg_to_fam,$fam_to_pegs) = &load_fams($dataD);

open(OUT,">","$dataD/role.inconsistencies") || die "could not open $dataD/role.inconsistencies";
my %seen;
foreach my $tuple (map { chop; [split(/\t/,$_)] } `cat $dataD/genomes.with.job.and.genomeID`)
{
    my($name,undef,$rast_job,$rast_genome) = @$tuple;
    if (open(BINDINGS,"<","/vol/rast-prod/jobs/$rast_job/rp/$rast_genome/Subsystems/bindings"))
    {
	&process_genome($peg_to_func,$peg_to_seq,$seq_to_pegs,$peg_to_fam,$fam_to_pegs,\%seen,\*OUT,\*BINDINGS,\%anno);
	close(BINDINGS);
    }
}
close(OUT);

sub process_genome {
    my($peg_to_func,$peg_to_seq,$seq_to_pegs,$peg_to_fam,$fam_to_pegs,$seen,$fhO,$fhB,$anno) = @_;
    while (defined($_ = <$fhB>))
    {
	chop;
	my($subsys,$role,$peg) = split(/\t/,$_);
	my $func1 = $peg_to_func->{$peg};
	if ($func1 && (index($role,$func1) >= 0))
	{
	    if (my $fam = $peg_to_fam->{$peg})
	    {
		my $pegs2 = $fam_to_pegs->{$fam};
		foreach my $peg2 (@$pegs2)
		{
		    my $func2 = $peg_to_func->{$peg2};
		    if ((! $func2) || ($func1 ne $func2))
		    {
			if (! ($seen->{"$func1\t$func2"} || $seen->{"$func2\t$func1"}))
			{
			    $seen->{"$func1\t$func2"} = 1;
			    my @anno_pegs = &get_anno_pegs([$peg,$peg2],$anno,$peg_to_seq,$seq_to_pegs);
			    print OUT join("\t",($peg,$func1,$peg2,$func2,join(",",@anno_pegs),$role,$subsys)),"\n";
			}
		    }
		}
	    }

	    my $seq = $peg_to_seq->{$peg};
	    if ($seq)
	    {
		my $pegs2 = $seq_to_pegs->{$seq};
		if ($pegs2)
		{
		    foreach my $peg2 (@$pegs2)
		    {
			my $func2 = $peg_to_func->{$peg2};
			if ((! $func2) || ($func1 ne $func2))
			{
			    if (! ($seen->{"$func1\t$func2"} || $seen->{"$func2\t$func1"}))
			    {
				$seen->{"$func1\t$func2"} = 1;
				my @anno_pegs = &get_anno_pegs([$peg,$peg2],$anno,$peg_to_seq,$seq_to_pegs);
				print OUT join("\t",($peg,$func1,$peg2,$func2,join(",",@anno_pegs),$role,$subsys)),"\n";
			    }
			}
		    }
		}
	    }
	}
    }
}

sub get_anno_pegs {
    my($initial_pegs,$anno,$peg_to_seq,$seq_to_pegs) = @_;

    my %anno_pegs;
    foreach my $peg (@$initial_pegs)
    {
	my $same_seq = $seq_to_pegs->{$peg_to_seq->{$peg}};
	my @apegs    = grep { $anno->{&SeedUtils::genome_of($_)} } @$same_seq;
	foreach $_ (@apegs) { $anno_pegs{$_} = 1 } 
    }
    return keys(%anno_pegs);
}

sub load_funcs {
    my($dataD) = @_;

    my $to_func = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/proposed*functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S.*\S)/)
	    {
		my $peg = $1;
		my $func = $2;
		$func =~ s/\s+\#.*$//;
		$to_func->{$peg} = $func;
	    }
	}
    }

    foreach my $g (`cat $dataD/anno.seed`)
    {
	chop $g;
	foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/assigned_functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S[^\t]*\S)/)
	    {
		my $peg = $1;
		my $func = $2;
		$func =~ s/\s+\#.*$//;
		$to_func->{$peg} = $func;
	    }
	}
    }
    return $to_func;
}

sub load_seqs {
    my($dataD) = @_;

    my $peg_to_seq = {};
    my $seq_to_pegs = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	$/ = "\n>";
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/Features/peg/fasta`)
	{
	    chomp;
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		chomp;
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }

    foreach my $g (`cat $dataD/anno.seed`)
    {
	chop $g;
	$/ = "\n>";
	foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/Features/peg/fasta`)
	{
	    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	    {
		chomp;
		my $peg = $1;
		my $seq = $2;
		$seq =~ s/\s//gs;
		$peg_to_seq->{$peg} = $seq;
		push(@{$seq_to_pegs->{$seq}},$peg);
	    }
	}
	$/ = "\n";
    }
    return ($peg_to_seq,$seq_to_pegs);
}

sub load_fams {
    my($dataD) = @_;

    my $peg_to_fam = {};
    my $fam_to_pegs = {};

    my $fam = 1;
    foreach $_ (`cat $dataD/protein.families`)
    {
	chop;
	my $pegs = [split(/\t/,$_)];
	foreach my $peg (@$pegs)
	{
	    $peg_to_fam->{$peg} = $fam;
	    $fam_to_pegs->{$fam} = $pegs;
	}
	$fam++;
    }
    return ($peg_to_fam,$fam_to_pegs);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3