[Bio] / FigKernelScripts / test_figfam_placement_into_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/test_figfam_placement_into_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (download) (as text) (annotate)
Thu Nov 11 17:33:27 2010 UTC (9 years ago) by disz
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.9: +2 -2 lines
FigFam 6 digit problem

use FF;
use FFs;

$usage = "usage: test_figfam_placement_into_family Data Dir1 Dir2 Process < families";

(
 ($data = shift @ARGV) &&
 ($dir1 = shift @ARGV) &&
 ($dir2 = shift @ARGV) &&
 ($proc = shift @ARGV)
)
    || die $usage;

my $figfams = new FFs($data);
$| = 1;
while (defined($_ = <STDIN>))
{
    if (($_ =~ /^(FIG\d{3}(\d{3}))$/) && open(PART,"<$data/FIGFAMS/$2/$1/decision.procedure"))
    {
	$fam_id = $1;
	print STDERR "$proc processing $fam_id\n";
	my $dec_proc = <PART>;
	my $part = ($dec_proc && ($dec_proc =~ /^\S+\s+(\d+)/)) ? $1 : undef;
	close(PART);
	my $mod = $part % 1000;
	my @partition_pegs = map { $_ =~ /^>(\S+)/; $1 } `fgrep \">\" $data/Partitions/$mod/$part/fasta`;
	my $figfam = new FF($fam_id,$data);
	if (! $figfam) 
	{ 
	    print STDERR "Family $fam_id could not be accessed\n"; 
	}
	else
	{

	    my $fam_func = $figfam->family_function;
	    my @pegs = @{$figfam->pegs_of};
	    my %in = map { $_ => 1 } @pegs;

	    foreach $peg2 (@partition_pegs)
	    {
		if (! $in{$peg2})
		{
		    if ((! $out_but_same{$peg2}) && (! $out_not_same{$peg2}))
		    {
			my $func2 = $figfams->function_of($peg2,1);
			if (&same($fam_func,$func2))
			{
			    $out_but_same{$peg2} = 1;
			}
			else
			{
			    $out_not_same{$peg2} = 1;
			}
		    }
		}
	    }

	    my $mod = ($fam_id =~ /(\d{3})$/) ? $1 : "";
	    my $dec_procF = &FIG::get_figfams_data() . "/$mod/$fam_id/decision.procedure";
	    if ((! -s $dec_procF) && (-s "$dec_procF.curr"))
	    {
		rename("$dec_procF.curr",$dec_procF);
	    }
	    if (-s $dec_procF)
	    {
		open(REP1,">$dir1/$fam_id") || die "could not open $dir1/$fam_id";
		&check_dec_proc(\%in,\%out_not_same,\*REP1,$figfam,$figfams);
		close(REP1);
		rename($dec_procF,"$dec_procF.curr");
		
		open(REP2,">$dir2/$fam_id") || die "could not open $dir2/$fam_id";
		&check_dec_proc(\%in,\%out_not_same,\*REP2,$figfam,$figfams);
		close(REP2);
		rename("$dec_procF.curr",$dec_procF);
	    }
	    else
	    {
		warn "missng decision procedure for $fam_id\n";
	    }
	}
    }
}

sub check_dec_proc {
    my($in,$out_not_same,$fh,$figfam,$figfams) = @_;

    my $in_ok = 0;
    my $in_bad = 0;
    my $out_ok =0;
    my $out_bad = 0;
    my $bad_dec = 0;

    my $fam_func = $figfam->family_function;
    my $fam_id = $figfam->family_id;

    my($peg,$placed);
    foreach $peg (sort keys(%$in))
    {
	my $pseq = $figfams->seq_of($peg);
	($placed,undef) = $figfam->should_be_member($pseq,undef,undef,$peg);
	if (! $placed)
	{
	    print $fh join("\t",("NOT_IN_SHOULD_BE",$peg,$fam_id,scalar $figfams->function_of($peg),$fam_func)),"\n";
	    $in_bad++;
	}
	else
	{
	    $in_ok++;
	}
    }

    foreach $peg (sort keys(%$out_not_same))
    {
	my $pseq = $figfams->seq_of($peg);
	($placed,undef) = $figfam->should_be_member($pseq,undef,undef,$peg);
	if ($placed)
	{
	    my($got,undef) = $figfams->place_in_family($pseq);
	    if (defined($got) && ($got->family_id eq $fam_id))
	    {
		print $fh join("\t",("OUT_PLACED_IN",$peg,$fam_id,$figfams->function_of($peg),$fam_func)),"\n";
		$out_bad++;
	    }
	    else
	    {
		print $fh join("\t",("PLACE_OK_BAD_DEC",
				     $peg,$fam_id,
				     $figfams->function_of($peg),
				     $fam_func,
				     $got ? $got->family_id : "",
				     $got ? $got->family_function : "")),"\n";
		$bad_dec++;
	    }
	}
	else
	{
	    $out_ok++;
	}
    }
    print $fh join("\t",('STATISTICS',$in_ok,$in_bad,$out_ok,$out_bad,$bad_dec)),"\n";
}

use SameFunc;
sub same {
    my($func1,$func2) = @_;

    my $i;
    $func1 =~ s/^FIG\d+[^:]*:\s*//;
    $func2 =~ s/^FIG\d+[^:]*:\s*//;

    if ($func1 eq $func2) { return 1 }
    if (&loose_same_func($func1,$func2)) { return 1 }
    return 0;
}
#
sub loose_same_func {
    my($f1,$f2) = @_;

    if (&SameFunc::same_func($f1,$f2,'strong')) { return 1 }
    my @s1 = split(/\s*[;\/@]\s*/,$f1);
    my @s2 = split(/\s*[;\/@]\s*/,$f2);
    if ((@s1 == 1) && (@s2 > 1) && &member($s1[0],\@s2))
    {
	return 1;
    }
    elsif ((@s2 == 1) && (@s1 > 1) && &member($s2[0],\@s1))
    {
	return 1;
    }
    else
    {
	return 0;
    }
}

sub member {
    my($x,$yL) = @_;
    my $i;

    for ($i=0; ($i < @$yL) && ($yL->[$i] ne $x); $i++) {}
    return ($i < @$yL);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3