[Bio] / FigKernelScripts / FFB2_pull_extreme_FF_members.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB2_pull_extreme_FF_members.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Tue Apr 23 20:31:41 2013 UTC (6 years, 6 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +45 -15 lines
Update to figfam processing

use DB_File;
use strict;
use Data::Dumper;
use gjostat;
use FIG;
use Getopt::Long;

my $fig = new FIG;
my $remove_fragments;

my $usage = "usage: FFB2_pull_extreme_FF_members [--remove-fragments] OldRelease NewRelease";

my $rc = GetOptions("remove-fragments" => \$remove_fragments);

my($oldR,$newR);
(
 $rc && 
 ($oldR    = shift @ARGV) &&
 ($newR    = shift @ARGV)
)
    || die $usage;

my $offset_cutoff = 2;

(-d $newR) || mkdir($newR,0777);

open(OLD,"<","$oldR/families.2c") || die "could not open $oldR/families.2c:$ !";
open(OLDF,"<","$oldR/family.functions") || die "could not open $oldR/family.functions:$ !";
open(NEW,">","$newR/families.2c") || die "could not open $newR/families.2c:$ !";
open(NEWF,">","$newR/family.functions") || die "could not open $newR/family.functions:$ !";
open(REMOVED, ">", "$newR/extreme.fams.removed") || die "Could not open $newR/extreme.fams.removed:$ !";

my %ff_funcs = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } <OLDF>;

my %peg_len;
if (!tie %peg_len, 'DB_File', "$oldR/length.btree", O_RDONLY, 0, $DB_BTREE)
{
    die "Cannot tie $oldR/length.btree: $!";
}
    

my $last = <OLD>;
while ($last && ($last =~ /^(\S+)/))
{
    my $curr = $1;
    my @set;
    while ($last && ($last =~ /^(\S+)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && ($1 eq $curr))
    {
	my $peg = $2;

	my $is_frag = 0;
	if ($remove_fragments)
	{
	    my $loc = $fig->feature_location($peg);
	    my $seq = $fig->dna_seq($3,$loc);
	    my $last_codon = substr($seq,-3);
	    my $first_codon = substr($seq,0,3);
	    if (!(($last_codon =~ /taa|tag|tga/i) && ($first_codon =~ /[acgt]tg/i)))
	    {
		$is_frag = 1;
	    }
	}

	if ($is_frag)
	{
#	    print STDERR "bad sequence=$seq\n";
	}
	else 
	{ 
	    push(@set,$peg);
	}
	$last = <OLD>;
    }
    if (@set > 2)
    {
	my @lens = map { my $peg = $_; 
			 my $ln = $peg_len{$peg};
			 $ln ? [$peg,$ln] : () } @set;
	my @just_lens = map { $_->[1] } @lens;
	my($mean,$stdev) = &gjostat::mean_stddev(@just_lens);
	$mean = sprintf("%0.2f",$mean);
	$stdev = sprintf("%0.2f",$stdev);
	print REMOVED "# $curr mean=$mean stddev=$stdev\n";
#	print "mean=$mean stdev=$stdev\n";
	my @tuples = sort { ($b->[2] <=> $a->[2]) }
	             map  { my($peg,$ln) = @$_;
			    my $off = sprintf("%0.2f",($stdev > 1) ? (abs($ln - $mean) / $stdev) : 0);
			    [$peg,$ln,$off] } @lens;
	foreach $_ (@tuples)
	{
	    my($peg,$ln,$off) = @$_;
	    if ($stdev < 50 || $off < $offset_cutoff)
	    {
		print NEW join("\t",($curr,$peg)),"\n";
	    }
	    else
	    {
		print REMOVED join("\t",($curr,@$_)),"\n";
	    }
	}
	print NEWF join("\t",($curr,$ff_funcs{$curr})),"\n";
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3