[Bio] / FigKernelScripts / apply_automatic_subsystem_updates.pl Repository:
ViewVC logotype

View of /FigKernelScripts/apply_automatic_subsystem_updates.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Jun 21 21:37:44 2011 UTC (8 years, 5 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, rast_rel_2014_0729, mgrast_release_3_1_2, mgrast_release_3_1_1, rast_rel_2011_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011, HEAD
Scripts to run rapid_subsystem_inference_batch across all genomes, and to apply
the missing-in-subsystems output of that script to update subsystems.

#
# Given a file of the form
#
# 217.1   Phage_tail_proteins_2   6
# 217.1   RNA_modification_and_chromosome_partitioning_cluster    1
# 217.1   Streptococcal_Hyaluronic_Acid_Capsule   2
# 217.1   Tricarboxylate_transport_cassette       1
#
# determine the subsystems that can have the genomes automatically added.
# Look in the subsystems/bindings files in the genome directories to
# determine the roles populated.
#

use strict;
use FIG;

my $fig = new FIG;

my %ss;
while (<>)
{
    chomp;
    my($genome, $ss, $variant) = split(/\t/);
    push(@{$ss{$ss}}, [$genome, $variant]);
}

for my $ss_name (sort keys %ss)
{
    if (!$fig->ok_to_auto_update_subsys($ss_name))
    {
	# print "No update $ss_name\n";
	next;
    }
    my $glist = $ss{$ss_name};
    my @genomes = map { $_->[0] } @$glist;

    my $ss = Subsystem->new($ss_name, $fig);
    my $curator = $ss->get_curator;
    print "Update $ss_name ($curator): @genomes\n";

    my %has_genomes = map { $_ => 1 } $ss->get_genomes;

    for my $gent (@$glist)
    {
	my($genome, $variant) = @$gent;

	if ($has_genomes{$genome})
	{
	    print "SS already has $genome\n";
	    next;
	}
	next unless $fig->is_prokaryotic($genome);

	$variant = "*$variant" unless $variant =~ /^\*/;
	
	my $bfile = $fig->organism_directory($genome) . "/Subsystems/bindings";
	open(B, "<", $bfile) or die "Cannot open bindings $bfile: $!";
	my %by_role;
	while (<B>)
	{
	    chomp;
	    my($bss, $role, $peg) = split(/\t/);
	    next unless $bss eq $ss_name;
	    push(@{$by_role{$role}}, $peg);
	}
	close(B);
	
	$ss->add_genome($genome);
	my $gi = $ss->get_genome_index($genome);
	
	print "Add $ss_name $genome idx=$gi $variant\n";
	$ss->set_variant_code($gi, $variant);

	for my $role ($ss->get_roles)
	{
	    my $pegs = delete $by_role{$role};
	    next unless $pegs;
	    print "   Role $role has @$pegs\n";
	    $ss->set_pegs_in_cell($genome, $role, $pegs);
	}
	if (%by_role)
	{
	    print "Leftover roles: ", join(" " , keys %by_role), "\n";
	}

    }
    $ss->write_subsystem();
    my $ss = Subsystem->new($ss_name, $fig);
    $ss->db_sync();
}
$fig->mark_subsystems_modified();


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3