[Bio] / FigKernelScripts / make_coreg_conjectures_based_on_subsys.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_coreg_conjectures_based_on_subsys.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon May 17 13:05:46 2010 UTC (9 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2010_0928, rast_rel_2010_1206, rast_rel_2010_0827
Changes since 1.2: +2 -2 lines
loosen parms

use SeedEnv;
use strict;
use Carp;
use Data::Dumper;
use Statistics::Descriptive;

my($genome,$rawF);
my $usage = "usage: make_coreg_conjectures_based_on_subsys Genome RawDataTab BadSubsystems";
(
 ($genome    = shift @ARGV) &&
 ($rawF      = shift @ARGV) 
)
    || die $usage;

my %bad_subsystems;
if (@ARGV > 0)
{
    map { $_ =~ /^(\S.*\S)/; $1 => 1 } `cat $ARGV[0]`;
}

my $sapO    = SAPserver->new;
my $corrH = &get_corr($rawF);

my $genomeH = $sapO->genomes_to_subsystems( -ids => [$genome] );
my @subs = map { ($_->[1] =~ /^\*?(0|-1)$/) ? () : $_->[0] } @{$genomeH->{$genome}};
@subs = grep { ! $bad_subsystems{$_} } @subs;
 
my $subH = $sapO->ids_in_subsystems( -subsystems => \@subs,
				     -genome     => $genome);
my %bad;
foreach my $sub (@subs)
{
    my %pegs;
    my $sub_entry = $subH->{$sub};
    my @pegs = ();
    foreach my $role (keys(%$sub_entry))
    {
	my $pegs = $sub_entry->{$role};
	foreach $_ (@$pegs) { $pegs{$_} = 1 }
    }
    @pegs = sort { &SeedUtils::by_fig_id($a,$b) } keys(%pegs);
    
    my @sets = grep { @$_ > 1 } split_on_pc(\@pegs,$corrH);

    if (@sets > ((@pegs + 2) / 3))
    {
	$bad{$sub} = 1;
#	print STDERR &Dumper([$sub,\@sets]);
    }
    else
    {
	foreach my $set (@sets)
	{
	    if (@$set > 1)
	    {
		print join(",",@$set),"\tInSubsystem:$sub\n";
	    }
	}
    }
}
foreach $_ (keys(%bad))
{
    print STDERR "bad subsystem\t$_\n";
}

sub split_on_pc {
    my($pegs,$corrH) = @_;

    my @sets = ();
    my %used;
    my $i;
    for ($i=0; ($i < (@$pegs - 1)); $i++)
    {
	if (! $used{$pegs->[$i]})
	{
	    my @poss = ($pegs->[$i]);
	    my $j;
	    for ($j=$i+1; ($j < @$pegs); $j++)
	    {
		if (&corr($pegs->[$j],\@poss,$corrH))
		{
		    push(@poss,$pegs->[$j]);
		    $used{$pegs->[$j]} = 1;
		}
	    }
	    push(@sets,\@poss);
	}
    }
    return @sets;
}

sub get_corr {
    my($rawFF) = @_;

    my %gene_to_values;
    open(RAW,"<$rawF") || die "could not open $rawF";
    while (<RAW>)
    {
	chomp;
	my ($gene_id, @gxp_values) = split("\t");
	$gene_to_values{$gene_id} = \@gxp_values;
    }
    close(RAW);
    return \%gene_to_values;
}

sub compute_pc
{
    my ($gene_ids, $gxp_hash) = @_;
    my %values = ();

    for (my $i = 0; $i < @$gene_ids-1; $i++)
    {
	my $stat = Statistics::Descriptive::Full->new();
	$stat->add_data(@{$gxp_hash->{$gene_ids->[$i]}});

	for (my $j = $i+1; $j < @$gene_ids; $j++)
	{
	    my ($q, $m, $r, $err) = $stat->least_squares_fit(@{$gxp_hash->{$gene_ids->[$j]}});
	    $values{$gene_ids->[$i]}->{$gene_ids->[$j]} = $r;
	    $values{$gene_ids->[$j]}->{$gene_ids->[$i]} = $r;
	}
    }
    
    return \%values;
}

sub corr {
    my($peg1,$cluster,$corrH) = @_;

    my $hash = &compute_pc([$peg1,@$cluster],$corrH);

#   print STDERR &Dumper($peg1,$cluster);
    
    my $sum = 0;
    foreach my $peg2 (@$cluster)
    {
	my $v = $hash->{$peg1}->{$peg2};
	if ((! defined($v)) || ($v < 0.4)) { return 0 }
	$sum += $v;
    }
    return (($sum / @$cluster) >= 0.7);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3