[Bio] / FigKernelScripts / FFB3_build_ff_oligos.pl Repository:
ViewVC logotype

View of /FigKernelScripts/FFB3_build_ff_oligos.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Tue Jan 10 13:18:14 2012 UTC (7 years, 10 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0729, mgrast_version_3_2, rast_rel_2014_0912, HEAD
New figfam building code.

# Coordination script that invokes FFB3_get_oligos to create oligos
# and usable_motifs to pull the ones that are unique.
# Creates the figfam index file.
# Creates large intermediate data in work_dir.
# Creates usable motifs for each kmer size kmin-kmax in outputdir.

use strict;
use FIG_Config;
use FIG;
use Data::Dumper;
use IPC::Run 'run';
use Proc::ParallelLoop;

my $usage = "FFB3_build_ff_oligos.pl parallel ffdir function-index kmin kmax work_dir outputdir";

@ARGV == 7 or die "Usage: $usage\n";

my $parallel = shift;
my $ff_dir = shift;
my $func_idx = shift;
my $kmin = shift;
my $kmax = shift;
my $work_dir = shift;
my $output_dir = shift;

-d $ff_dir or die "ffdir $ff_dir does not exist\n";

my $trans_btree = "$ff_dir/translation.btree";

-f $trans_btree or die "Required file $trans_btree does not exist\n";

-d $work_dir or die "Work directory $work_dir does not exist\n";
-d $output_dir or die "Output directory $output_dir does not exist\n";

# Offset is one since we use ordering constraints for family kmers.

my @cmd = ("$FIG_Config::bin/FFB3_get_oligos", $ff_dir, $func_idx, $work_dir);
print "Start @cmd\n";
my $rc = system(@cmd);

if ($rc != 0)
    die "Error $rc running @cmd";

# We feed the oligos with the output of a merge of the data created by get_gs_oligos.
# Each set induces a work unit to be run in parallel.

my @work;

opendir(D, "$work_dir") or die "Cannot open $work_dir: $!";
my @sets = sort map { s/^kmers\.//; $_ } grep { /^kmers\./ && -d "$work_dir/$_" } readdir(D);
for my $set (@sets)
    my $set_dir = "$work_dir/kmers.$set";
    opendir(D, $set_dir) or die "Cannot open $set_dir: $!";
    my @files = map { "$set_dir/$_" } sort grep { /^\d+$/ && -f "$set_dir/$_" } readdir(D);
    for my $col (2, 3)
	push(@work, [$set, $set_dir, $col, \@files]);

print Dumper(\@work);

pareach \@work, sub {
    my($work) = shift;
    my($set, $set_dir, $col, $input_files) = @$work;

    my $n = @$input_files + 1;
    my $out = "$output_dir/kmers.$col.$set";
    my $rc = run(["$FIG_Config::kmer_tools/sort", "-m", "--parallel", 1, "--batch-size", $n, @$input_files], '|',
		 ["$FIG_Config::kmer_tools/usable_motifs", $out, "$kmin-$kmax", $col, '-']);
    print "Return is $rc\n";
}, { N_Workers => $parallel };

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3