[Bio] / FigKernelScripts / pg_core_families.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_core_families.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Fri May 10 17:48:35 2013 UTC (6 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.7: +1 -1 lines
minor fix

use RASTserver;
use strict;
use Data::Dumper;
use Getopt::Long;
use SeedEnv;
use PG;

my $usage = "usage: pg_core_families  -d DataDir\n";

my $dataD;

my $rc  = GetOptions('d=s' => \$dataD);

if ((! $rc) || 
    (! -d $dataD))
{ 
    print STDERR $usage; exit ;
}

my $pg = new PG($dataD);
my @genomes = $pg->genomes;
my $N   = @genomes;
my $minN = 0.8 * $N;

open(IN,"<","$dataD/protein.families") || die "can not open $dataD/protein.families";
open(OUTC,">","$dataD/core.families") || die "could not open $dataD/core.families";
open(OUTN,">","$dataD/non.core.families") || die "could not open $dataD/non.core.families";
my $fam;
while (defined($fam = <IN>))
{
    chop $fam;
    my @pegs = split(/\t/,$fam);
    if (@pegs >= $minN)
    {
	my %genomes;
	my $dups = 0;
	foreach my $peg (@pegs)
	{
	    my $g = &SeedUtils::genome_of($peg);
	    if ($genomes{$g}) { $dups = 1 }
	    $genomes{$g}++;
	}

	if ((keys(%genomes) >= $minN) && (! $dups))
	{
	    print OUTC $fam,"\n";
	}
	else 
	{
	    print OUTN $fam,"\n";
	}
    }
    else 
    {
	print OUTN $fam,"\n";
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3