[Bio] / FigKernelScripts / update_PSEED_subsystems.pl Repository:
ViewVC logotype

View of /FigKernelScripts/update_PSEED_subsystems.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Sep 20 20:25:54 2010 UTC (9 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.2: +24 -0 lines
add doc

########################################################################
use Time::HiRes 'gettimeofday';
use YAML::Any qw(DumpFile LoadFile);
use Data::Dumper;

=head1 NAME

update_PSEED_subsystems

=head1 SYNOPSIS

  update_PSEED_subsystems [-restart restart-file] reconstruction-directory

=head1 DESCRIPTION

Updates the PSEED subsystems using the reconstruction information in
C<reconstruction-directory>.

Basic steps.

Use &build_needed() to pull all subsystem information from the Sapling. This 
build a data structure that has all the subsystems and roles 
from the annotator SEED. It also loads the metabolic reconstruction
data computed earlier by L<compute_PSEED_reconstructions.pl.

=cut


use FIG;        ### This references the PSEED
my $fig = new FIG;
use Subsystem;
use Getopt::Long;

my $restart_file;
my $dry_run;
my $rc = GetOptions("dry-run" => \$dry_run,
		    "restart=s" => \$restart_file);

($rc && @ARGV == 1) or die "Usage: update_PSEED_subsystems [-restart restart-file] reconstruction-dir\n";

my $recon_dir = shift;
-d $recon_dir or die "Reconstruction directory $recon_dir does not exist\n";
print "go: dryrun=$dry_run restart=$restart_file\n";
use SeedEnv;    ### The servers go (more-or-less) to the ASEED
my $annO = ANNOserver->new();
my $sapO = SAPserver->new();

use strict;

my($vcs,$bindings,$rolesH,$roleAbbrH);

if (defined($restart_file) && -s $restart_file)
{
    print STDERR "Loading restart file...\n";
    my $r = LoadFile($restart_file);
    print STDERR "...done\n";
    ($vcs, $bindings, $rolesH, $roleAbbrH) = @$r;
}
else
{
    ($vcs,$bindings,$rolesH,$roleAbbrH) = &build_needed($sapO, $recon_dir);
    if (defined($restart_file))
    {
	DumpFile($restart_file, [$vcs, $bindings, $rolesH, $roleAbbrH]);
    }
}

my @subsysL = sort keys(%$vcs);
foreach my $sub (@subsysL)
{
    print "\nProcess $sub\n";
    my $roles = [sort keys(%{$rolesH->{$sub}})];
    &sync_sub_and_roles($annO,$fig,$sub,$roles,$roleAbbrH);

    my $altered = 0;
    if (my $vcsH = $vcs->{$sub})
    {
	my $subO           = new Subsystem($sub,$fig);
	my @genomes_in     = sort { $a cmp $b } $subO->get_genomes;
	my @genomes_needed = sort { $a cmp $b } keys(%$vcsH);
	my $i = 0;
	my $n = 0;
	while (($i < @genomes_in) || ($n < @genomes_needed))
	{
	    if (($i < @genomes_in) && (($n == @genomes_needed) || ($genomes_in[$i] lt $genomes_needed[$n])))
	    {
		print "    Remove $genomes_in[$i]\n";
		$subO->remove_genome($genomes_in[$i]) if !$dry_run;
		$i++;
		$altered = 1;
	    }
	    elsif (($n < @genomes_needed) && ($i == @genomes_in) || ($genomes_in[$i] gt $genomes_needed[$n]))
	    {
		my $idx = $subO->add_genome($genomes_needed[$n]) if !$dry_run;
		print "    Add $genomes_needed[$n]\n";
		$subO->set_variant_code($idx,$vcsH->{$genomes_needed[$n]}) if !$dry_run;
		$altered = 1;
		foreach my $role (@$roles)
		{
		    if (my @pegs = keys(%{$bindings->{$genomes_needed[$n]}->{$role}}))
		    {
			print "        $role => @pegs\n";
			$subO->set_pegs_in_cell($genomes_needed[$n],$role,[sort { &FIG::by_fig_id($a,$b) } @pegs]) if !$dry_run;
		    }
                }
		$n++;
	    }
	    elsif (($i < @genomes_in) && ($n < @genomes_needed) && ($genomes_in[$i] eq $genomes_needed[$n]))
	    {
		print "    Update $genomes_in[$i]\n";
		my $vc = $subO->get_variant_code_for_genome($genomes_in[$i]) if !$dry_run;
		if ($vc ne $vcsH->{$genomes_in[$i]})
		{
		    my $idx = $subO->get_genome_index($genomes_in[$i]);

		    print "        change vc from $vc to $vcsH->{$genomes_needed[$n]}\n";
		    $subO->set_variant_code($idx,$vcsH->{$genomes_needed[$n]}) if !$dry_run;
		    $altered = 1;
		}

		foreach my $role (@$roles)
		{
		    my @pegs_in = sort { &SeedUtils::by_fig_id($a,$b) } 
		                  $subO->get_pegs_from_cell($genomes_in[$i],$role);
		    my @pegs_needed = sort { &SeedUtils::by_fig_id($a,$b) } 
		                      keys(%{$bindings->{$genomes_needed[$n]}->{$role}});
		    if (@pegs_in != @pegs_needed)
		    {
			$altered = 1;
			print "        $role => @pegs_needed\n";
			$subO->set_pegs_in_cell($genomes_in[$i],$role,\@pegs_needed) if !$dry_run;
		    }
		    else
		    {
			for (my $i=0; ($i < @pegs_needed) && ($pegs_needed[$i] eq $pegs_in[$i]); $i++) {}
			if ($i < @pegs_needed)
			{
			    $altered = 1;
			    print "        $role => @pegs_needed\n";
			    $subO->set_pegs_in_cell($genomes_needed[$n],$role,\@pegs_needed) if !$dry_run;
			}
		    }
			    
		}		    
		$n++;
		$i++;
	    }
	}

	if ($altered)
	{
	    if (!$dry_run)
	    {
		$subO->write_subsystem;
	    }
	}
    }
}

sub build_needed {
    my($sapO, $recon_dir) = @_;

    my @files = <$recon_dir/*/*>;
    my @genome_files = sort { $a->[0] cmp $b->[0] } map { m,/(\d+\.\d+)$, ; [$1, $_] } @files;

    #
    # Load the subsystem role/abbreviation information from sapling.
    #

    my $roleInfo = $sapO->query(-limit => 999999999,
				-objects => 'Subsystem Includes Role',
				-fields => [qw(Subsystem(id) Includes(abbreviation) Role(id))]);

    my $role_map = {};

    map { $role_map->{$_->[0], $_->[2]} = $_->[1] } @$roleInfo;
    
    my $vcs = {};
    my $bindings = {};
    my $rolesH = {};
    foreach my $genome_file (@genome_files)
    {
	my($genome, $recon_file) = @$genome_file;

	my $meta_recon = LoadFile($recon_file);
	
	foreach my $tuple (@$meta_recon)
	{
	    my($subvc,$role,$peg) = @$tuple;
	    $bindings->{$genome}->{$role}->{$peg} = 1;
	    my($sub,$vc) = ($subvc =~ /^(\S.*\S)\s*:(\S+)$/);
	    defined($sub) || die "sub=$sub vc=$vc subvc=$subvc";
	    $vcs->{$sub}->{$genome} = $vc;
	    $rolesH->{$sub}->{$role} = 1;
	}
    }
    return ($vcs,$bindings,$rolesH, $role_map);
}

sub sync_sub_and_roles {
    my($annO,$fig,$sub,$roles,$roleAbbrH) = @_;

    my $subO = new Subsystem($sub,$fig);
    my $must_fix = 0;
    if ($subO)
    {
	my @roles_in = sort $subO->get_roles;
	my @roles_needed = sort @$roles;
	if (@roles_in != @roles_needed)
	{
	    $must_fix = 1;
	}
	else
	{
	    my $i;
	    for ($i=0; ($i < @roles_in) && ($roles_in[$i] eq $roles_needed[$i]); $i++) {}
	    if ($i < @roles_in) { $must_fix = 1 }
	}
    }
    else
    {
	$must_fix = 1;
	$subO = new Subsystem($sub,$fig,1) if !$dry_run;  ## create new subsystem
    }

    if ($must_fix)
    {
	my $roles_abbrs = [map { [$_, $roleAbbrH->{$sub, $_}] } @$roles];
	print "Set $sub @$roles\n";
	$subO->set_roles($roles_abbrs) if !$dry_run;
	if (!$dry_run)
	{
	    $subO->write_subsystem;
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3