[Bio] / FigKernelScripts / get_coupling_values.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_coupling_values.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Tue Sep 14 18:39:54 2010 UTC (9 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +0 -4 lines
get rid of extraneous routine

use strict;
use SeedEnv;
use Data::Dumper;
use FIG_Config;

my $usage = "get_coupling_values families.2c  [URL]";
my($fam2c);
(
 ($fam2c       = shift @ARGV)
)
    || die $usage;

my $genome_sets = "$FIG_Config::global/genome.sets";

my $url = shift @ARGV;
my $sapO = new SAPserver(  url => $url );

my $tmp1 = "tmp.ff.$$.1";
my $tmp2 = "tmp.ff.$$.2";
my $tmp3 = "tmp.ff.$$.3";
&SeedUtils::run("cut -f1,2 $fam2c | sort -T . -k 2 > $tmp1");

# my $otuMaps    = $sapO->representative_genomes;
# my $to_otu     = $otuMaps->[0];

open(GS, "<", $genome_sets) or die "Cannot open $genome_sets: $!";
my $to_otu = { map { ($_ =~ /^(\d+)\t(\d+\.\d+)/) ? ($2 => $1) : () } <GS> };
close(GS);

my %otus = map { $to_otu->{$_} => 1 } keys(%$to_otu);

$_ = keys(%otus);
print STDERR "$_ OTUs in analysis\n";

open(TMP,"<",$tmp1) || die "could not open $tmp1";
my $last = <TMP>;
my $n = 0;
while (defined($last) && ($last =~ /^\S+\tfig\|(\d+\.\d+)/) && ($n < 40000))
{
    my $genome = $1;
    my $pegs   = [];
    my $to_ff  = {};
    while ($last && ($last =~ /^(\S+)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && ($3 eq $genome))
    {
	push(@$pegs,$2);
	$to_ff->{$2} = $1;
	$last = <TMP>;
    }
    my $otu = $to_otu->{$genome};

    if (defined($otu))
    {
	print STDERR "processing $genome\n";
	&process_genome($sapO,$otu,$pegs,$to_ff,$tmp2,$tmp3);
	$n++;
    }
}
close(TMP);

my $co_occH = &summarize($tmp2,4);
my $co_expH = &summarize($tmp3,2);
unlink($tmp1,$tmp2,$tmp3);

my %pairs = map { $_ => 1 } (keys(%$co_occH),keys(%$co_expH));
foreach my $pair (sort keys(%pairs))
{
    my $sc1 = $co_occH->{$pair} ? $co_occH->{$pair} : 0;
    my $sc2 = $co_expH->{$pair} ? $co_expH->{$pair} : 0;
    print join("\t",($pair,$sc2,$sc1)),"\n";
}

sub summarize {
    my($tmp,$min) = @_;
    open(TMP,"sort $tmp |") || die "could not open $tmp";

    my $hash = {};
    $last = <TMP>;
    while (defined($last) && ($last =~ /^(\S+\t\S+)/))
    {
	my $pair  = $1;
	my $score = 0;
	while (defined($last) && ($last =~ /^(\S+\t\S+)/) && ($1 eq $pair))
	{
	    $score++;
	    $last = <TMP>;
	}
	if ($score >= $min)
	{
	    $hash->{$pair} = $score;
	}
    }
    close(TMP);
    return $hash;
}

sub process_genome {
    my($sapO,$otu,$pegs,$to_ff,$tmp2,$tmp3) = @_;

    &process_contiguity($sapO,$otu,$pegs,$to_ff,$tmp2);
    &process_expression($sapO,$otu,$pegs,$to_ff,$tmp3);
}

sub process_expression {
    my($sapO,$otu,$pegs,$to_ff,$tmp2) = @_;
    open(TMP3,"| sort -u >> $tmp3") || die "could not open $tmp3";
    my $fidH = $sapO->coregulated_fids( -ids => $pegs );
    foreach my $peg (@$pegs)
    {
	if (my $fidHp = $fidH->{$peg})
	{
	    my $f1 = $to_ff->{$peg};
	    my @hits = grep { $_->[1] >= 0.6 } map { [$_, $fidHp->{$_}] } keys(%$fidHp);
	    foreach my $hit (@hits)
	    {
		my $f2 = $to_ff->{$hit->[0]};
		if ($f1 && $f2)
		{
		    if ($f1 gt $f2) { ($f1,$f2) = ($f2,$f1) }
		    print TMP3 join("\t",($f1,$f2,$otu)),"\n";
		}
	    }
	}
    }
    close(TMP3);
}


sub process_contiguity {
    my($sapO,$otu,$pegs,$to_ff,$tmp2) = @_;
    open(TMP2,"| sort -u >> $tmp2") || die "could not open $tmp2";
    my $locH = $sapO->fid_locations( -ids => $pegs, -boundaries => 1 );    
    my @pegs_with_locs = sort { ($a->[1] cmp $b->[1]) or ($a->[2] <=> $b->[2]) }
                         map { my $peg = $_; my $loc = $locH->{$peg};
 	                 ($loc =~ /^\d+\.\d+:(\S+)_(\d+)([-+])(\d+)/) ? [$peg,$1,$2,$3,$4] : () } @$pegs;

    my $i;
    for ($i=0; ($i < (@pegs_with_locs - 1)); $i++)
    {
	my $j;
	for ($j=$i+1; ($j < @pegs_with_locs) && (&gap_sz($pegs_with_locs[$i],$pegs_with_locs[$j]) <= 5000); $j++)
	{
	    my $f1 = $to_ff->{$pegs_with_locs[$i]->[0]};
	    my $f2 = $to_ff->{$pegs_with_locs[$j]->[0]};
	    ($f1 && $f2) || die "something is wrong: f1=$f1 f2=$f2";
	    if ($f1 gt $f2) { ($f1,$f2) = ($f2,$f1) }
	    print TMP2 join("\t",($f1,$f2,$otu)),"\n";
	}
    }
    close(TMP2);
}

sub gap_sz {
    my($x,$y) = @_;

    if ($x->[1] ne $y->[1]) { return 1000000 }
    my $min = ($x->[3] eq "+") ? ($x->[2] + $x->[4]) : $x->[2];
    my $max = ($y->[3] eq "+") ? $y->[2] : ($y->[2] - $y->[4]);
    return abs($max - $min);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3