[Bio] / FigKernelScripts / build_new_figfam_release.pl Repository:
ViewVC logotype

View of /FigKernelScripts/build_new_figfam_release.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Thu Nov 11 17:46:07 2010 UTC (9 years ago) by disz
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.4: +2 -2 lines
FigFam 6 digit problem

use FIG;
my $fig = new FIG;

use strict;

my $usage = "usage: build_new_FIGfam_release OldFigFamsData NewFigFamsData";

my($oldFF,$newFF);

(
 ($oldFF = shift @ARGV) &&
 ($newFF = shift @ARGV)
)   
    || die $usage;

(-s "$newFF/families.2c")      || die "$newFF must contain families.2c";
(-s "$newFF/family.functions") || die "$newFF must contain family.functions";

open(OLD2c,"sort $oldFF/families.2c |")
    || die "could not open $oldFF/families.2c";
open(NEW2c,"sort $newFF/families.2c |")
    || die "could not open $newFF/families.2c";
open(NEWFUNC,"sort $newFF/family.functions |")
    || die "could not open $oldFF/family.functions";

my $lastN;
my $lastO;

my $setO  = &read_old_set(\$lastO,\*OLD2c,1);
my $setN  = &read_new_set(\$lastN,\*NEW2c,\*NEWFUNC,1);

while ($setN || $setO)
{
    if ((! $setN) || ($setO && ($setO->[0] lt $setN->[0])))
    {
	&delete_dir($setO,$newFF);
	$setO = &read_old_set(\$lastO,\*OLD2c);
    }
    elsif ((! $setO) || ($setN && ($setO->[0] gt $setN->[0])))
    {
	print "$setN->[0]\n";
	$setN  = &read_new_set(\$lastN,\*NEW2c,\*NEWFUNC);
    }
    else  # they are equal
    {
	if (&same($setO,$setN))
	{
	    &set_function($setN,$newFF);
	    $setO = &read_old_set(\$lastO,\*OLD2c);
	    $setN  = &read_new_set(\$lastN,\*NEW2c,\*NEWFUNC);
	}
	else
	{
	    &reset_ids_and_clear($setN,$newFF);
	    print "$setN->[0]\n";
	    $setO = &read_old_set(\$lastO,\*OLD2c);
	    $setN  = &read_new_set(\$lastN,\*NEW2c,\*NEWFUNC);
	}
    }
}
close(OLD2c);
close(NEW2c);
close(NEWFUNC);

sub full_path {
    my($FF,$fam) = @_;
    my $mod = substr($fam,-3);
    return "$FF/FIGFAMS/$mod/$fam";
}

sub delete_dir {
    my($setO,$FF) = @_;

    if (! ($FF && ($setO->[0] =~ /^FIG\d+/)))
    {
	print STDERR &Dumper($setO,$FF);
	confess "bad parameters: $setO->[0]\t$FF";
    }

    my $dir = &full_path($FF,$setO->[0]);
    ($dir =~ /FIG\d+$/) || confess "bad directory: $dir";
    if (-d $dir)
    {
	&FIG::run("rm -r $dir");
    }
}

sub read_old_set {
    my($lastP,$fh2c,$first) = @_;

    if ($first) { $$lastP = <$fh2c>; }

    my $set;
    if ($$lastP =~ /^(\S+)/)
    {
	my $fam = $1;
	my @tmp = ();
	while ($$lastP && ($$lastP =~ /^(\S+)\t(\S+)/) && ($1 eq $fam))
	{
	    push(@tmp,$2);
	    $$lastP = <$fh2c>;
	}
	$set = [$fam,[sort @tmp]];
    }
    return $set;
}

sub read_new_set {
    my($lastP,$fh2c,$fh_func,$first) = @_;

    if ($first) { $$lastP = <$fh2c>; }

    my $set;
    if (defined($$lastP) && ($$lastP =~ /^(\S+)/))
    {
	my $fam = $1;
	my @tmp = ();
	while ($$lastP && ($$lastP =~ /^(\S+)\t(\S+)/) && ($1 eq $fam))
	{
	    push(@tmp,$2);
	    $$lastP = <$fh2c>;
	}
	my $funcL = <$fh_func>;
	while (($funcL =~ /^(\S+)\t(\S.*\S)/) && ($1 ne $fam)) { $funcL = <$fh_func> }
	if (($funcL =~ /^(\S+)\t(\S.*\S)/) && ($1 eq $fam))
	{
	    $set = [$fam,[sort @tmp],$2];
	}
	else
	{
	    die "missing Function: $fam $funcL\n";
	}
    }
    return $set;
}
	    
sub set_function {
    my($set,$FF) = @_;

    my $dir = &full_path($FF,$set->[0]);
    open(TMP,">$dir/function") || die "could not open $dir/function";
    print TMP "$set->[2]\n";
    close(TMP);
}

sub same {
    my($setO,$setN) = @_;

    my $pegs1 = $setO->[1];
    my $pegs2 = $setN->[1];
    my $n1 = @$pegs1;
    my $n2 = @$pegs2;
    if ($n1 == $n2)
    {
	my $i;
	for ($i=0; ($i < $n1) && ($pegs1->[$i] eq $pegs2->[$i]); $i++) {}
	if ($i == $n1)
	{
	    return 1;
	}
    }
    return 0;
}

sub reset_ids_and_clear {
    my($setN,$newFF) = @_;

    my $dir = &full_path($newFF,$setN->[0]);
    if (-d $dir)
    {
	my($fam,$pegs,$func) = @$setN;
	if (open(DEC,"<$dir/decision.procedure"))
	{
	    my $x = <DEC>;
	    if     ($x =~ /^ross/) { &reset_ross($pegs,$func,$dir)  }
	    else                   { &reset_blast($pegs,$func,$dir) }
	    close(DEC);
	}
	else                       { &reset_ross($pegs,$func,$dir) }
    }
}

sub reset_ross {
    my($pegs,$func,$dir) = @_;

    &reset_pegs_and_func($pegs,$func,$dir);
    unlink("$dir/bounds");
    unlink("$dir/bounds.sims");
}

sub reset_blast {
    my($pegs,$func,$dir) = @_;

    &reset_pegs_and_func($pegs,$func,$dir);
}

sub reset_pegs_and_func {
    my($pegs,$func,$dir) = @_;

    unlink("$dir/built");
    unlink("$dir/PEGs");
    unlink("$dir/PEGs.fasta");
    unlink("$dir/PEGs.fasta.phr");
    unlink("$dir/PEGs.fasta.pin");
    unlink("$dir/PEGs.fasta.psq");
    unlink("$dir/reps");
    unlink("$dir/function");
    open(PEGS,">$dir/PEGs") || die "could not open $dir/PEGs";
    print PEGS join("\n",@$pegs),"\n";
    close(PEGS);
    &FIG::run("$FIG_Config::ext_bin/formatdb -i $dir/PEGs -p T");
    open(FUNC,">$dir/function") || die "could not open $dir/function";
    print FUNC "$func\n";
    close(FUNC);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3