[Bio] / FigKernelScripts / get_rid_of_small_or_too_many_per_FF.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_rid_of_small_or_too_many_per_FF.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Tue Apr 16 12:48:13 2013 UTC (6 years, 7 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.1: +12 -1 lines
allow short prots in subsys

use strict;
use FIG;
my $fig = new FIG;

##  This takes a 2-col (+) families.2c file as input, producing an updated version
##
my $last = <STDIN>;
while ($last && ($last =~ /^(\S+)/))
{
    my $fam = $1;
    my @set;
    while ($last && ($last =~ /^(\S+)\t(\S+)(.*)$/) && ($1 eq $fam))
    {
	push(@set,$last);
	$last = <STDIN>;
    }

    my $ok;
    if ((@set > 1) && ($ok = &ok_fam($fig,\@set)) && (@$ok > 1))
    {
	foreach $_ (@$ok)
	{
	    print $_;
	}
    }
}

sub ok_fam {
    my($fig,$set) = @_;

    my %genomes;
    my $ok = [];
    foreach my $line (@$set)
    {
	if ($line =~ /^\S+\t(fig\|(\d+\.\d+)\.peg\.\d+)/)
	{
	    my $peg = $1;
	    $genomes{$2}++;
	    if ($genomes{$2} > 100)
	    {
		return [];
	    }
	    my $ln = $fig->translation_length($peg);
	    if ($ln > 50) 
	    {
		push(@$ok,$line);
	    }
	    else
	    {
		my @ss = $fig->peg_to_subsystems($peg);
		if (@ss > 0)
		{
		    push(@$ok,$line);
		}
	    }
	}
    }
    return $ok;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3