[Bio] / FigKernelScripts / apply_peg_changes_to_subsystem.pl Repository:
ViewVC logotype

View of /FigKernelScripts/apply_peg_changes_to_subsystem.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Dec 5 18:56:37 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.2: +17 -0 lines
Add license words.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#
# Given a set of peg-update transaction log files of the form:
#
# change old-id new-id contig protein-seq
# del id
# add id contig protein-seq
#
# And a list of subsystems, modify the subsystems in place (writing backups
# as usual) to remove the deleted ids and change any occurrences of the old ids to
# the corresponding new id.
#

use strict;
use Data::Dumper;
use FIG;

my $fig = new FIG;

my $usage = "apply_peg_changes_to_subsystem transaction-log-files... [-subsystems subsystem names]";

my(@trans_files, @subs);

my $mode = 'trans';
while (@ARGV)
{
    $_ = shift;
    if (/^-(.*)/)
    {
	if ($1 eq "subsystems")
	{
	    $mode = 'subsystems';
	}
	else
	{
	    die $usage;
	}
    }
    elsif ($mode eq 'trans')
    {
	push(@trans_files, $_);
    }
    elsif ($mode eq 'subsystems')
    {
	push(@subs, $_);
    }
}

my($trans_table, $genomes_seen) = read_trans_files(\@trans_files);

if (@subs == 0)
{
    @subs = sort $fig->all_subsystems();
}

for my $sub (@subs)
{
    fix_subsystem($sub, $trans_table, $genomes_seen);
}

sub fix_subsystem
{
    my($sname, $actions, $genomes_seen) = @_;

    my $sub = $fig->get_subsystem($sname);

    $sub or die "Cannot find subsystem $sname\n";

    my $modified;

    for my $genome (@$genomes_seen)
    {
	my $row_idx = $sub->get_genome_index($genome);

	next unless $row_idx;
	
	my $row = $sub->get_row($row_idx);

	my $tchange = 0;
	my $tdel = 0;

	for my $cell (@$row)
	{
	    my @ncell = ();
	    my $change = 0;
	    my $del = 0;
	    
	    for my $peg (@$cell)
	    {
		my $action = $actions->{$peg};
		if ($action eq 'del')
		{
		    $del++;
		}
		elsif ($action =~ /^fig\|(\d+\.\d+)/)
		{
		    if ($1 ne $genome)
		    {
			die "Invalid replacement $peg -> $action in $sname";
		    }
		    push(@ncell, $action);
		    $change++;
		}
		else
		{
		    push(@ncell, $peg);
		}
	    }
	    if ($change or $del)
	    {
		@$cell = @ncell;
		$tdel += $del;
		$tchange += $change;
	    }
	}

	if ($tdel or $tchange)
	{

	    if (not $modified)
	    {
		my $cur = $sub->get_curator;
		print "$sname\t$cur\n";
	    }

	    print "  $genome\t$tdel deletes\t$tchange changes\n";
	    $modified++;
	}
    }

    if ($modified)
    {
	$sub->write_subsystem(1);
    }
    
}

sub read_trans_files
{
    my($files) = @_;

    my $actions = {};
    my %genomes_seen;

    for my $file (@$files)
    {
	open(F, "<$file") or die "Cannot open transaction file $file: $!";

	while (<F>)
	{
	    chomp;
	    my($type, @args) = split(/\t/);

	    if ($type eq 'del')
	    {
		$actions->{$args[0]} = 'del';

		$genomes_seen{$fig->genome_of($args[0])}++;
	    }
	    elsif ($type eq 'change')
	    {
		$actions->{$args[0]} = $args[1];
		$genomes_seen{$fig->genome_of($args[0])}++;
	    }
	    elsif ($type eq 'add')
	    {
	    }
	    else
	    {
		die "Invalid action at $file line $.";
	    }
	}
    }
    return $actions, [sort keys %genomes_seen];
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3