[Bio] / FigKernelScripts / ex_get_subsystem_based_estimates.pl Repository:
ViewVC logotype

View of /FigKernelScripts/ex_get_subsystem_based_estimates.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Sat Nov 23 15:10:20 2013 UTC (5 years, 11 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +2 -2 lines
minor changes

use SeedEnv;
use strict;
use Carp;
use Data::Dumper;
use Getopt::Long;

my $usage = "usage: ex_get_subsystem_based_estimates -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD);

if ((! $rc) || (! -d $dataD))
{ 
    print STDERR $usage; exit ;
}

open(GENOME,"<$dataD/genome") || die "$dataD/genome does not exist";
my $genome = <GENOME>;
chomp $genome;
($genome =~ /^(\d+\.\d+)/) || die "genome: $genome is invalid";
$genome = $1;

my $pccsF = "$dataD/locus.pccs";
my $aliasF = "$dataD/aliases";
open(ALIAS,"<$aliasF") || die "could not open $aliasF";
my %to_peg;
while (defined($_ = <ALIAS>))
{
    chomp;
    my @tmp = split(/\t/,$_);
    my @fig = grep { $_ =~ /^fig\|/ } @tmp;
    if (@fig == 1)
    {
	foreach my $alias (grep { $_ ne $fig[0] } @tmp)
	{
	    $to_peg{$alias} = $fig[0];
	}
    }
}
close(ALIAS);

my $corrH = {};
open(PCC,"<$dataD/locus.pccs") || die "could not open $dataD/locus.pccs";
while (defined($_ = <PCC>))
{
    if (($_ =~ /^(\S+)\t(\S+)\t(\S+)/) &&
	(my $peg1 = $to_peg{$1}) &&
	(my $peg2 = $to_peg{$2}))
    {
	$corrH->{$peg1}->{$peg2} = $corrH->{$peg2}->{$peg1} = $3;
    }
}
close(PCC);

my $sapO    = SAPserver->new;

my $genomeH = $sapO->genomes_to_subsystems( -ids => [$genome] );
my @subs = map { ($_->[1] =~ /^\*?(0|-1)$/) ? () : $_->[0] } @{$genomeH->{$genome}};

my $subH = $sapO->ids_in_subsystems( -subsystems => \@subs,
				     -genome     => $genome);
my @subs = sort  keys %$subH;

my %bad;

foreach my $sub (@subs)
{
    my %pegs;
    my @pegs;

    my $sub_entry = $subH->{$sub};
    @pegs = ();
    foreach my $role (keys(%$sub_entry))
    {
	my $pegs = $sub_entry->{$role};
	foreach $_ (@$pegs) { $pegs{$_} = 1 }
    }
    @pegs = sort { &SeedUtils::by_fig_id($a,$b) } keys(%pegs);
    
    my @sets = grep { @$_ > 1 } split_on_pc(\@pegs,$corrH);

    if (@sets > ((@pegs + 2) / 3))
    {
	$bad{$sub} = 1;
#	print STDERR &Dumper([$sub,\@sets]);
    }
    else
    {
	foreach my $set (@sets)
	{
	    if (@$set > 1)
	    {
		print join(",",@$set),"\tInSubsystem:$sub\n";
	    }
	}
    }
}
foreach $_ (keys(%bad))
{
    print STDERR "bad subsystem\t$_\n";
}

sub split_on_pc {
    my($pegs,$corrH) = @_;

    my @sets = ();
    my %used;
    my $i;
    for ($i=0; ($i < (@$pegs - 1)); $i++)
    {
	if (! $used{$pegs->[$i]})
	{
	    my @poss = ($pegs->[$i]);
	    my $j;
	    for ($j=$i+1; ($j < @$pegs); $j++)
	    {
		if (&corr($pegs->[$j],\@poss,$corrH))
		{
		    push(@poss,$pegs->[$j]);
		    $used{$pegs->[$j]} = 1;
		}
	    }
	    push(@sets,\@poss);
	}
    }
    return @sets;
}

sub corr {
    my($peg1,$cluster,$corrH) = @_;

    my $sum = 0;
    foreach my $peg2 (@$cluster)
    {
	my $v = $corrH->{$peg1}->{$peg2};
	if ((! defined($v)) || ($v < 0.4)) { return 0 }
	$sum += $v;
    }
    return (($sum / @$cluster) >= 0.7);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3