[Bio] / FigKernelScripts / FFB3_get_gs_oligos.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB3_get_gs_oligos.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Jan 10 13:18:14 2012 UTC (7 years, 10 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, mgrast_version_3_2, rast_rel_2014_0912, HEAD
New figfam building code.

# -*- perl -*-

#
# Create the set of all oligos for the genomes in the
# OTUs. This uses the C++ program make_oligos and writes
# sets of oligos, each individually sorted, which need to
# be merged before use.
#

use strict;
use Data::Dumper;
use Carp;
use IPC::Run qw(start finish reap_nb run);
use Proc::ParallelLoop;
use IPC::Open3;
use POSIX ":sys_wait_h";
use FileHandle;
use FIG;
use FFB3;

my $usage = "usage: FFB3_get_gs_oligos FFdir set.index outdir";

my $min_sz = 7;
my $max_sz = 12;
my($fam2cF,$famFuncF,$famFuncIdx, $out_dir);

@ARGV == 3 or die $usage;

my $oligo_maker = "$FIG_Config::kmer_tools/make_oligos";

my $ff_dir = shift;
my $setIdx = shift;
my $out_dir = shift;

open(SETI, ">", $setIdx) or die "Cannot write $setIdx:$ !";

-d $out_dir || mkdir $out_dir || die "Cannot mkdir $out_dir: $!";


my $fig = new FIG;

my %active_genomes = map { $_ => 1 } $fig->genomes('complete');

if ($ENV{FFB3_TEST_MODE})
{
    %active_genomes = map { $_ => 1 } qw(83333.1  909946.3);
}

open(SETS,"<", "$ff_dir/genome.sets") || die "could not open $ff_dir/genome.sets: $!";

my(%seen,%genomes);

open(MAKER, "|-", $oligo_maker, $min_sz, $max_sz, 0,
     "$ff_dir/translation.btree", $out_dir) || die "Cannot open $oligo_maker: $!";


while (defined(my $x = <SETS>))
{
    if (($x =~ /^(\d+)\t(\d+\.\d+)\t(\S.*\S)/) && $active_genomes{$2})
    {
	$genomes{$2} = $1;
        if (! $seen{$1})
        {   
	    $seen{$1} = 1;
	    print SETI "$1\t$3\n";
        }
    }
}
close(SETS);
close(SETI);

foreach my $genome (sort { $a <=> $b } keys(%genomes))
{
    my($xx) = $genome =~ /(\d)\./;

    my $gs = $fig->genus_species($genome);
    my $set = $genomes{$genome};
    print STDERR "$genome $gs\n";

    my @pegs = $fig->all_features($genome,'peg');

    my $val = "\t$set\n";
			     
    foreach my $peg (@pegs)
    {
	print MAKER $peg, $val;
# 	my $i;
# 	my $seq = $ffb3->get_translation($peg);
# 	my $ln = length($seq);
# 	for($i=0; ($i <= ($ln - $min_sz)); $i++)
# 	{
# 	    my $oligo = uc substr($seq,$i,($i < ($ln-$max_sz)) ? $max_sz : ($ln-$i));
# 	    if ($oligo =~ /^([ACDEFGHIKLMNPQRSTVWY])[ACDEFGHIKLMNPQRSTVWY]*$/)
# 	    {
# 		$ffb3->bundle_write($1, $oligo . "\t" . $set . "\n");
# 	    }
# 	}
    }
}

#$ffb3->bundle_finish($out_dir);

close(MAKER);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3