[Bio] / FigKernelScripts / get_ss_connections.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_ss_connections.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (download) (as text) (annotate)
Fri Jul 24 02:13:15 2009 UTC (10 years, 4 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.11: +3 -0 lines
only call if not in a ss

#__perl__
#

=pod

=head1 get_ss_connections.pl

For the metagenomes we connect genomes to subsystems by similarity to proteins in subsystems. These are then stored as attributes that we can recall for generating heat maps

This script generates the counts of pegs from any genome that are in a subsystem, writes them to a temporary file, and then generates the percent. In addition, we now add these metagenomes into the subsystem so that they are displayed using thenew seed viewer.

=cut

use FIG;
use FIGV;
use strict;

my($orgdir, $do_figv, $bindings, $variants);

my $verbose;

my ($genome, $maxN, $maxP, $xipe, $pof, $ppof)=(undef, 50, 10, undef, undef, undef);
while (@ARGV)
{
    my $t=shift;
    if ($t eq "-orgdir") { $orgdir = shift; }
    if ($t eq '-figv') { $do_figv = 1; $bindings = shift; $variants = shift }
    if ($t eq "-g") {$genome=shift}
    if ($t eq "-n") {$maxN=shift}
    if ($t eq "-p") {$maxP=shift}
    if ($t eq "-x") {$xipe=shift}
    if ($t eq "-o") {$pof=shift}
    if ($t eq "-h") {$ppof=shift}
    if ($t eq '-v') { $verbose = 1;}
}

my $fig;

if ($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIG;
}

my $usage=<<EOF;
$0
-figv bindings-file variantsf-file (generate FIGV-style output)
-orgdir dir (configure a FIGV instead of a FIG, using dir for the org dir)
-g genome id
-n maxN (default=$maxN)
-p maxP (default=$maxP)
-x xipe output file (optional)
-o peg output file (optional) outputs a list of pegs/ss
-h file to write hits to proteins from other genomes (outputs peg/sim/ss)

EOF

die $usage unless ($genome);

if ($do_figv)
{
    open(BIND, ">$bindings") or die "Cannot write $bindings: $!";
    open(VAR, ">$variants") or die "Cannot write $variants: $!";
}

my $subsys; my $pegsinss; my $phits;
my $subsysrole; # subsystems and roles
foreach my $peg ($fig->pegs_of($genome)) 
{
    my @sims = $fig->sims($peg, $maxN, $maxP, 'fig', 'figx');
    my $isinss; # this peg is in a subsystem, so call it done and move on.
    foreach my $sims (@sims)
    {
        foreach my $ss ($fig->subsystems_for_peg($sims->[1]))
        {
	    $isinss = 1;
	    my($ssname, $role) = @$ss;
	    $subsys->{$ssname}->{$peg} = 1;
	    $subsysrole->{$ssname}->{$role}->{$peg} = 1;
	    
            $pegsinss->{$peg}=1;
            $phits->{$peg}->{$sims->[1]}->{$ssname}=1;
        }
	last if ($isinss);
    }
}   


# add the genome to the subsystems tables
foreach my $ss (keys %$subsysrole)
{
	my $sub=$fig->get_subsystem($ss);
	if (!$sub)
	{
	    warn "Cannot get_subsystem for $ss\n";
	    next;
	}
	my $variant;
	if (!$do_figv)
	{
	    $sub->add_genome($genome); # add the genome if it is not present. Does nothing if it is present
	}
	foreach my $role (keys %{$subsysrole->{$ss}})
	{
		# first see what pegs we have for this genome
		my %allroles=map {$_=>1} $sub->get_pegs_from_cell($genome, $role);
		# add the new pegs
		map {$allroles{$_}=1} keys %{$subsysrole->{$ss}->{$role}};
		# now set all the pegs
		if ($do_figv)
		{
		    for my $peg (keys %{$subsysrole->{$ss}->{$role}})
		    {
			print BIND join("\t", $ss, $role, $peg), "\n";
		    }
		}
		else
		{
		    $sub->set_pegs_in_cell($genome, $role, [keys %allroles]);
		}
		$variant="env genome"; # we should really do something clever here to figure out what the variant code is most likely to be
	}
	if ($variant)
	{
		my $idx = $sub->get_genome_index($genome);
		if ($do_figv)
		{
		    print VAR join("\t", $ss, $variant), "\n";
		}
		else
		{
		    $sub->set_variant_code($idx,$variant);
		    $sub->write_subsystem;
		}
	}
}
		


my $npegs=scalar(keys %$pegsinss); # the number of pegs that are connected to subsystems
foreach my $ss (keys %$subsys)
{
    print join("\t", $genome, "ss_connections", "$ss:".scalar(keys %{$subsys->{$ss}})/$npegs), "\n";
}

if ($xipe)
{
    open(OUT, ">$xipe") ||  die "Can't write to $xipe";
    print OUT map {"$_\t".scalar(keys %{$subsys->{$_}})."\n"} keys %$subsys;
    close OUT;
}


if ($pof)
{
    open(OUT, ">$pof") || die "Can't write to $pof";
    foreach my $ss (keys %$subsys)
    {
        foreach my $peg (keys %{$subsys->{$ss}}) {print OUT "$peg\t$ss\n"}
    }
    close OUT;
}

if ($ppof)
{
    open(OUT, ">$ppof") || die "Can't write to $ppof";
    foreach my $peg (keys %$phits)
    {
        foreach my $sim (keys %{$phits->{$peg}})
        {
            foreach my $k (keys %{$phits->{$peg}->{$sim}})
            {
                print OUT join("\t", $peg, $sim, $k), "\n";
            }
        }
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3