[Bio] / FortyEight / rp_chunk_sims.pl Repository:
ViewVC logotype

View of /FortyEight/rp_chunk_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Nov 22 21:01:42 2006 UTC (13 years, 2 months ago) by olson
Branch: MAIN
48hr mods

#
# Chunk a fasta file into pieces suitable for cluster BLAST calculations.
#
# We are provided the NR and peg.synonyms files that should be used for this.
#
# Usage: rp_chunk_sims fasta-file nr peg.synonyms sims-job-dir
#
# We write a file task.list into sims-job-dir that contains the list of work units.
#
# The work units will write raw sims into sims-job-dir/sims.raw
#

use strict;
use File::Basename;
use Cwd 'abs_path';

@ARGV == 4 or die "Usage: $0 fasta-file nr peg.synonyms sims-job-dir\n";

my $fasta = shift;
my $nr_file = shift;
my $pegsyn = shift;
my $jobdir = shift;

-d $jobdir or mkdir $jobdir or die "Cannot mkdir $jobdir: $!\n";

my $max_size = 20_000;
my $next_task = 1;
my $last_task;

my $task_file = "$jobdir/task.list";
my $input_dir = "$jobdir/sims.in";
my $output_dir = "$jobdir/sims.raw";
my $error_dir = "$jobdir/sims.err";

-d $input_dir or mkdir $input_dir or die "Cannot mkdir $input_dir: $!\n";
-d $output_dir or mkdir $output_dir or die "Cannot mkdir $output_dir: $!\n";
-d $error_dir or mkdir $error_dir or die "Cannot mkdir $error_dir: $!\n";

my $flags = "-m 8 -e 1.0e-5 -FF -p blastp";

my @fasta_files = ($fasta);

open(TASK, ">$task_file") or die "Cannot write $task_file: $!";

#
# Buzz through once to ensure we can open them.
#
for my $file (@fasta_files)
{
    open(F, "<$file") or die "Cannot open $file: $!\n";
    close(F);
}

for my $file (@fasta_files)
{
    my $cur_size = 0;
    my $cur_input = '';

    my $base = basename($file);
    $file = abs_path($file);
	
    open(F, "<$file") or die "Cannot open $file: $!\n";

    print "Chunk file $file\n";
    
    while (<F>)
    {
	if (/^>/)
	{
	    if ($cur_size >= $max_size)
	    {
		write_task($base, $input_dir, $output_dir, $error_dir, $cur_input);
		$cur_size = 0;
		$cur_input = '';
	    }
	    $cur_input .= $_;
	    $cur_size += length($_);
	}
	else
	{
	    $cur_input .= $_;
	    $cur_size += length($_);
	}
    }
    if ($cur_size >= 0)
    {
	write_task($base, $input_dir, $output_dir, $error_dir, $cur_input);
	$cur_size = 0;
	$cur_input = '';
    }
    close(F);
}
close(TASK);

print "tasks\t1\t$last_task\n";

#
# Write an input chunk to $dir.
# Write a line on the 
sub write_task
{
    my($base, $input_dir, $output_dir, $error_dir, $fasta) = @_;

    my $task = $next_task++;

    my $idir = "$input_dir/$base";
    my $odir = "$output_dir/$base";
    my $edir = "$error_dir/$base";

    -d $idir or mkdir($idir) or die "Cannot mkdir $idir: $!\n";
    -d $odir or mkdir($odir) or die "Cannot mkdir $odir: $!\n";
    -d $edir or mkdir($edir) or die "Cannot mkdir $edir: $!\n";

    my $in = "$idir/in.$task";
    my $out = "$odir/out.$task";
    my $err = "$edir/err.$task";
    
    open(I, ">$in") or die "Cannot write $in: $!";
    print I $fasta;
    close(I);
    print TASK join("\t", $task, $in, $nr_file, $flags, $out, $err), "\n";
    $last_task = $task;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3