[Bio] / FigKernelScripts / compute_changed_ids_for_nrs.pl Repository:
ViewVC logotype

View of /FigKernelScripts/compute_changed_ids_for_nrs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Wed Jul 8 20:23:01 2009 UTC (10 years, 4 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.4: +40 -24 lines
misc fixes

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


$usage = "usage: compute_changed_ids_for_nrs OldNR OldSyn NewNR AddedIDs ChangedIDs DeletedIDs";

(  ($old_nr  = shift @ARGV)  
&& ($old_syn = shift @ARGV)
&& ($new_nr  = shift @ARGV)  
&& ($added   = shift @ARGV) && open(ADDED,   ">$added") 
&& ($changed = shift @ARGV) && open(CHANGED, ">$changed")
&& ($deleted = shift @ARGV) && open(DELETED, ">$deleted")
)
    || die $usage;

#$old = &load_ids($old_nr);
$new = &load_ids($new_nr);
#$alt_peg = &load_equiv($old_syn);

#while (($key,undef) = each(%$old))

open(OLD, "<", $old_nr) or die "cannot open old nr $old_nr: $!";

while (<OLD>)
{
    next unless /^>(\S+)/;
    my $key = $1;
    
    if (! $new->{$key})
    {
	#
	# The way we build NR these days we will not have any IDs in the
	# NR that would be on the RHS of a peg.synonyms id list.
	#
	print DELETED  "$key\n";
# 	if ($alts = $alt_peg->{$key})
# 	{
# 	    for ($i=0; ($i < @$alts) && (! $new->{$alts->[$i]}); $i++) {}
	    
# 	    if ($i == @$alts)
# 	    {
# 		print DELETED  "$key\n";
# 	    }
# 	    else
# 	    {
# 		delete $new->{$alts->[$i]};
# 		print CHANGED "$key\t$alts->[$i]\n";
# #		print DELETED "$key\n";
# #		print ADDED   "$alts->[$i]\n";
# 	    }
# 	}
# 	else
# 	{
# 	    print DELETED  "$key\n";
# 	}
    }
    else
    {
	delete $new->{$key};
    }
}

while (($key,undef) = each %$new)
{
    print ADDED "$key\n";
}

sub load_ids {
    my($nr) = @_;
    my($entries,$x);

    print "Loading ids\n";
    open(NR, "<", $nr) || die "could not open $nr";
    my $entries = {};
    while (defined($x = <NR>))
    {
	if ($x =~ /^>(\S+)/)
	{
	    $entries->{$1} = 1;
	}
    }
    close(NR);
    print "done\n";
    return $entries;
}

sub load_equiv {
    my($file) = @_;
    my($main,$alt,$main_id,$main_ln,@alt);

    my $alt_peg = {};
    open(TMP,"<$file") || die "could not open $file";
    while (defined($x = <TMP>))
    {
	chop;
	($main,$alt) = split(/\t/,$x);
	($main_id,$main_ln) = split(/,/,$main);
	@alt = map { $_ =~ /^(\S+),(\d+)$/; ($2 == $main_ln) ? $1 : () } split(/;/,$alt);
	if (@alt > 0)
	{
	    $alt_peg->{$main_id} = [@alt];
	}
    }
    close(TMP);
    return $alt_peg;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3