[Bio] / FigKernelScripts / partition_prots.pl Repository:
ViewVC logotype

View of /FigKernelScripts/partition_prots.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Fri Sep 12 01:47:15 2008 UTC (11 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_2008_0924, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2008_11_24
Changes since 1.1: +10 -11 lines
fixes to FIGV.pm, CompareMR.pm and FIG.pm to support comparison of metabolic reconstructions

use DB_File;
use strict;

use FIG;
my $fig = new FIG;

my $max_sz;
my $usage = "usage: partition_prots MaxPartSz";
(
 ($max_sz = shift @ARGV)
)
    || die $usage;


my %prot_hash;
my %seen_hash;
&build_hashes(\%prot_hash,\%seen_hash);

my @all = sort keys(%prot_hash);
my $n = @all;
print STDERR "$n proteins to process\n";

my $i;
for ($i=0; ($i < @all); $i++)
{
    if (! $seen_hash{$all[$i]})
    {
	print STDERR "processing $all[$i], $i of $#all\n";
	&process($fig,$all[$i],\%seen_hash,\%prot_hash,$max_sz,\$n);
    }
}

sub process {
    my($fig,$prot,$seen,$prot_hash,$max_sz,$nP) = @_;

    my %in;
    $in{$prot} = 1;
    my %closest;
    $closest{$prot} = 0;

    my $sz = 1;
    my $peg = $prot;
    while (($sz < $max_sz) && defined($peg))
    {
	$$nP--;
	delete $closest{$peg};
	my @sims = $fig->sims($peg,10000,1,'raw');
	foreach my $sim (@sims)
	{
	    my $id2 = $sim->id2;
	    if ($prot_hash->{$id2})
	    {
		if (! $in{$id2})
		{
		    $in{$id2} = 1;
		    $sz++;
		    $closest{$id2} = $sim->bsc;
		}
		elsif (defined($closest{$id2}) && ($closest{$id2} < $sim->bsc))
		{
		    $closest{$id2} = $sim->bsc;
		}
	    }
	}
	$seen->{$peg} = 1;
	print "IN\t$prot\t$peg\n";
	$peg = &the_closest(\%closest,$seen);
    }
    print STDERR "$$nP left\n";
    foreach $_ (sort keys(%in))
    {
	print join("\t",('SET',$prot,$_)),"\n";
    }
}

sub the_closest {
    my($closest,$seen) = @_;

    my($peg,$x,$peg1,$x1);
    while (($peg1,$x1) = each %$closest)
    {
	if ((! $seen->{$peg}) && ((! defined($peg)) || ($x1 > $x)))
	{
	    $peg = $peg1;
	    $x = $x1;
	}
    }
    return $peg;
}

sub build_hashes {
    my($prot_hash,$seen_hash) = @_;

    my($prot_hash_tie,$seen_hash_tie);
#    $prot_hash_tie = tie %$prot_hash, 'DB_File','prot_hash.db',O_RDWR,0666,$DB_BTREE;
#    $seen_hash_tie = tie %$seen_hash, 'DB_File','seen_hash.db',O_RDWR,0666,$DB_BTREE;
#    return;

    $prot_hash_tie = tie %$prot_hash, 'DB_File','prot_hash.db',O_CREAT,0666,$DB_BTREE;
    $prot_hash_tie || die "tie failed";

    $seen_hash_tie = tie %$seen_hash, 'DB_File','seen_hash.db',O_CREAT,0666,$DB_BTREE;
    $seen_hash_tie || die "tie failed";

    open(SYMS,"<$FIG_Config::global/peg.synonyms")
	|| die "could not open peg.synonyms";
    while (defined($_ = <SYMS>))
    {
	chop;
	my($head,$rest) = split(/\t/,$_);
	if ($rest =~ /fig\|/)
	{
	    my @fig = map { ($_ =~ /(fig\|\d+\.\d+\.peg\.\d+)/) ? $1 : () } split(/;/,$rest);
	    @fig = grep { $fig->is_complete(&FIG::genome_of($_)) } @fig;
	    if (@fig > 0)
	    {
		$head =~ s/,\d+$//;
		$prot_hash{$head} = 1;
		foreach my $peg (@fig)
		{
		    $seen_hash{$peg} = 1;
		}
	    }
	}
    }
    print STDERR "completed pass through peg.synonyms\n";

    foreach my $genome ($fig->genomes('complete'))
    {
	foreach my $peg ($fig->all_features($genome,'peg'))
	{
	    if (! $seen_hash{$peg})
	    {
		$prot_hash{$peg} = 1;
	    }
	}
    }
    print STDERR "marked remaining pegs\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3