[Bio] / MGRASTBackend / mg_chunk_figfam_sims.pl Repository:
ViewVC logotype

View of /MGRASTBackend/mg_chunk_figfam_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Jul 6 19:40:28 2009 UTC (10 years, 3 months ago) by arodri7
Branch: MAIN
CVS Tags: HEAD
first version, figfams pipeline

#!/usr/bin/perl
#
# Chunk a fasta file into pieces suitable for cluster BLAST calculations.
#
# We are provided the NR and peg.synonyms files that should be used for this.
#
# Usage: rp_chunk_sims -p blastprog -o 'blastopts' -n num-seqs -nr nr -j sims-job-dir fasta [fasta ...]
#
#
# We write a file task.list into sims-job-dir that contains the list of work units.
#
# The work units will write raw sims into sims-job-dir/sims.raw
#

use strict;
use File::Basename;
use Cwd 'abs_path';

my $usage = "$0 [-p blastprog] [-o blastopts] [-N num-chars] [-n num-seqs] -nr nr-file -j jobdir fasta [fasta...]";

my $blastprog = "blastx";
my $flags = "-m 8 -FF";
my $max_seqs;
my $max_chars;
my $jobdir;
my $ff_data;
my $fasta;

while (@ARGV)
{
    my $opt = shift;
    if ($opt eq '-p')
    {
	$blastprog = shift;
    }
    elsif ($opt eq '-o')
    {
	$flags = shift;
    }
    elsif ($opt eq '-f')
    {
	$fasta = shift;
    }
    elsif ($opt eq '-d')
    {
	$ff_data = shift;
    }
    elsif ($opt eq '-n')
    {
	$max_seqs = shift;
    }
    elsif ($opt eq '-N')
    {
	$max_chars = shift;
    }
    elsif ($opt eq '-j')
    {
	$jobdir = shift;
    }
    elsif ($opt =~ /^-/)
    {
	die "Invalid option $opt. Usage: $usage\n";
    }
    else
    {
	unshift(@ARGV, $opt);
	last;
    }
}

$flags = "$flags -p $blastprog";

if (@ARGV == 0)
{
    die $usage;
}

my @figfam_output_files = @ARGV;

for my $f (@figfam_output_files)
{
    -f $f or die "FIGfam output file $f does not exist\n";
}

if (!defined($max_seqs) and !defined($max_chars))
{
    die "Must pass either -n or -N option\n";
}

-d $jobdir or mkdir $jobdir or die "Cannot mkdir $jobdir: $!\n";

my $next_task = 1;
my $last_task;

my $task_file = "$jobdir/task.list";
my $input_dir = "$jobdir/sims.in";
my $output_dir = "$jobdir/sims.raw";
my $error_dir = "$jobdir/sims.err";

-d $input_dir or mkdir $input_dir or die "Cannot mkdir $input_dir: $!\n";
-d $output_dir or mkdir $output_dir or die "Cannot mkdir $output_dir: $!\n";
-d $error_dir or mkdir $error_dir or die "Cannot mkdir $error_dir: $!\n";

$input_dir = abs_path($input_dir);
$output_dir = abs_path($output_dir);
$error_dir = abs_path($error_dir);

open(TASK, ">$task_file") or die "Cannot write $task_file: $!";

for my $file (@figfam_output_files)
{
    my $cur_size = 0;
    my $cur_count = 0;
    my $cur_input = '';

    my $base = basename($file);
    $file = abs_path($file);
	
    open(F, "<$file") or die "Cannot open $file: $!\n";

    print "Chunk file $file\n";
    
    while (<F>)
    {
	if ((defined($max_seqs) and ($cur_count >= $max_seqs)))
	{
	    write_task($base, $ff_data, $fasta, $input_dir, $output_dir, $error_dir, $cur_input);
	    $cur_count = 0;
	    $cur_input = '';
	}
	$cur_input .= $_;
	$cur_count++;
    }
    if ($cur_count >= 0)
    {
	write_task($base, $ff_data, $fasta, $input_dir, $output_dir, $error_dir, $cur_input);
	$cur_input = '';
	$cur_count = 0;
    }
    close(F);
}
close(TASK);

print "tasks\t1\t$last_task\n";

#
# Write an input chunk to $dir.
# Write a line on the 
sub write_task
{
    my($base, $ff_data, $fasta_file, $input_dir, $output_dir, $error_dir, $chunk) = @_;

    my $task = $next_task++;

    my $idir = "$input_dir/$base";
    my $odir = "$output_dir/$base";
    my $edir = "$error_dir/$base";

    -d $idir or mkdir($idir) or die "Cannot mkdir $idir: $!\n";
    -d $odir or mkdir($odir) or die "Cannot mkdir $odir: $!\n";
    -d $edir or mkdir($edir) or die "Cannot mkdir $edir: $!\n";

    my $in = "$idir/in.$task";
    my $out = "$odir/out.$task";
    my $err = "$edir/err.$task";
    
    open(I, ">$in") or die "Cannot write $in: $!";
    print I $chunk;
    close(I);
    print TASK join("\t", $task, $in, $flags, $ff_data, $fasta_file, $out, $err), "\n";
    $last_task = $task;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3