[Bio] / FigKernelScripts / pg_tabulate_reactions.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_tabulate_reactions.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Sun Mar 10 20:09:16 2013 UTC (6 years, 8 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +30 -0 lines
compute core reactions

use strict;
use Data::Dumper;
use Getopt::Long;

my $usage = "usage: pg_tabulate_reactions -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

my $to_func = &load_funcs($dataD);
my %react_to_genomes;
my %genomes = map { chop; ($_ => 1) } `cut -f4 $dataD/genomes.with.job.and.genomeID`;
my $N = keys(%genomes);
open(OUT,">$dataD/reaction.inconsistencies") || die "could not open $dataD/reaction.inconsistencies";

foreach $_ (`cat $dataD/reaction_genome_peg.table`)
{
    chop;
    my($reaction,$genome,$pegs) = split(/\t/,$_);
    foreach my $peg (split(/,/,$pegs))
    {
	$react_to_genomes{$reaction}->{$genome}->{$peg} = 1;
    }
}

my %core;
open(CORE,"| sort > $dataD/core.reactions") || die "could not open $dataD/core.reactions";
foreach my $reaction (keys(%react_to_genomes))
{
    my @tmp = keys(%{$react_to_genomes{$reaction}});
    if (@tmp >= (0.8 * $N))
    {
	print CORE "$reaction\n";
	$core{$reaction} = 1;
    }
}
close(CORE);

open(NONCORE,"| sort > $dataD/non-core.reactions") || die "could not open $dataD/non-core.reactions";
foreach my $reaction (keys(%react_to_genomes))
{
    next if ($core{$reaction});
    my $gH = $react_to_genomes{$reaction};
    foreach my $g (keys(%$gH))
    {
	my $pegH = $gH->{$g};
	foreach my $peg (keys(%$pegH))
	{
	    print NONCORE join("\t",($reaction,$g,$peg)),"\n";
	}
    }
}
close(NONCORE);


foreach my $r (keys(%react_to_genomes))
{
    my $rH = $react_to_genomes{$r};
    my @genomes = keys(%$rH);
    if (@genomes < $N)
    {
	print OUT $r,"\n";
	my %in = map { $_ => 1 } @genomes;
	my @without = sort { $a <=> $b } grep { ! $in{$_} } keys(%genomes);
	print OUT join(",",@without),"\n";
	my %funcs;
	foreach my $g (@genomes)
	{
	    my $pegH = $rH->{$g};
	    foreach my $peg (keys(%$pegH))
	    {
		my $func = $to_func->{$peg};
		push(@{$funcs{$func}},$peg);
	    }
	}
	foreach my $f (keys(%funcs))
	{
	    my $pegs = join(",",@{$funcs{$f}});
	    print OUT "\t$f\t$pegs\n";
	}
	print OUT "//\n";
    }
}

	    
sub load_funcs {
    my($dataD) = @_;

    my $to_func = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/proposed*functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S.*\S)/)
	    {
		$to_func->{$1} = $2;
	    }
	}
    }
    return $to_func;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3