[Bio] / FigKernelScripts / seed_run_job.pl Repository:
ViewVC logotype

View of /FigKernelScripts/seed_run_job.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Tue Oct 10 18:37:14 2006 UTC (13 years, 1 month ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.3: +113 -25 lines
readonly fixes; seed-run-job update to run from input file

#
# Submit a job to a cluster.
#
# A job is defined by three things: a script to execute a piece of work,
# an input directory containing one piece of work per file, and an output directory
# to which the results from pieces of work are written.
#
# For each file in the input directory, the script is run with the contents of that
# file as the standard output, and the standard output is written to a file in 
# the output directory with the same name as the input file.
#
# If the input argument is a file, each line in the file is treated as input
# to an invocation of the script. If the --stdin flag is given, the line is
# passed to the script on stdin. Otherwise, each line is treated as a 
# list of arguments to be passed to the script.
#
# Each job has a spool directory created in $FIG_Config::temp. In the
# spool directory we create a task list that maps from task id to 
# the input and output files for that task. The script is also copied to this
# spool directory.
#

use strict;
use File::Basename;
use FIG_Config;
use Getopt::Long;
use File::Copy;
use Cwd 'abs_path';

my $usage = "$0 [--resource resourcename] [--queue queuename] [--stdin] script input out-dir\n";

my $queue = $FIG_Config::default_queue;

my $resource = "localdb";

my $res = GetOptions("queue|q=s", \$queue,
		     "resource=s", \$resource);

$res or die $usage;

@ARGV == 3 or die $usage;

my $script = shift;
my $input = shift;
my $out_dir = shift;

my $input_type;

if (! -f $script)
{
    #
    # Try to find it in $PATH
    #

    for my $envdir (split(/:/, $ENV{PATH}))
    {
	my $s = "$envdir/$script";
	if (-f $s)
	{
	    $script = $s;
	    warn "Found $s at $script\n";
	    last;
	}
    }
    warn "$script not found in path\n";
}


if (-f $input)
{
    $input_type = 'FILE';
}
elsif (-d $input)
{
    $input_type = 'DIR';
}
else
{
    die "Input $input does not exist\n";
}

-d $out_dir or die "Output directory $out_dir does not exist\n";

$input = abs_path($input);
$out_dir = abs_path($out_dir);

my $spool = "$FIG_Config::temp/job_spool.$$";
mkdir $spool;

print "Spool directory is $spool\n";

#
# Copy script to spool;
# create task list.
#

my $sbase = basename($script);

my $spool_script = "$spool/$sbase";
copy($script, $spool_script) or die "Cannot copy $script to $spool_script: $!\n";
system("chmod +x $spool_script");

my $tasks = "$spool/task.list";
my $n_tasks;

if ($input_type eq 'DIR')
{
    my $in_dir = $input;

    opendir(D, $in_dir) or die "Cannot read directory $in_dir: $!\n";
    
    my $task = 1;
    open(T, ">$tasks") or die "Cannot open task list $tasks for writing: $!\n";
    for my $in_file (sort readdir(D))
    {
	next if $in_file =~ /^\./;
	
	#
	# Check to see if we have an output file already. Abort if so, so we don't
	# overwrite old work.
	#
	
	my $out_file = "$out_dir/$in_file";
	if (-f "$out_file")
	{
	    die "Output file $out_file already exists, aborting.\n";
	}
	
	print T join("\t", $task, $spool_script, "", "$in_dir/$in_file", $out_file), "\n";
	$task++;
    }
    close(T);
    $n_tasks = $task - 1;
}
elsif ($input_type eq 'FILE')
{
    my $in_dir = $input;

    open(IN, "<$input") or die "Cannot read input file $input: $!\n";
    
    my $task = 1;

    open(T, ">$tasks") or die "Cannot open task list $tasks for writing: $!\n";

    while (<IN>)
    {
	chomp;

	#
	# Tabs in here will mess up the task file.
	#
	s/\t/ /g;

	#
	# Check to see if we have an output file already. Abort if so, so we don't
	# overwrite old work.
	#
	
	my $out_file = "$out_dir/$task";
	if (-f "$out_file")
	{
	    die "Output file $out_file already exists, aborting.\n";
	}
	
	print T join("\t", $task, $spool_script, $_, "/dev/null", $out_file), "\n";
	$task++;
    }
    close(T);
    close(IN);
    $n_tasks = $task - 1;
}
else
{
    die "Invalid input type: this should not happen";
}

#
# Submit.
#
#
# We arrange to set SEED_TASK_DIR to the spool directory so that the
# task runner can find the tasklist. 
#
# We also arrange to put job error and output to directories in the spool dir.
#

my $q_out = "$spool/q.out";
mkdir $q_out;

my $q_err = "$spool/q.err";
mkdir $q_err;

my @qsub_args;
if ($queue)
{
    push(@qsub_args, "-q", $queue);
}

push(@qsub_args,
     "-l", $resource,		# in the bio* environment, we require a local database to run.
     "-b", "yes",		# Submit as binary so that it doesn't get treated as a shell script
     "-e", $q_err,
     "-o", $q_out,
     "-t", "1-$n_tasks",
     "-v", "SEED_TASK_DIR=$spool",
     "-v", "SEED_ROOT=$FIG_Config::fig_disk",
     "$FIG_Config::bin/seed_run_task");

my $rc = system("qsub", @qsub_args);
if ($rc != 0)
{
    print "Error submitting job: rc=$rc\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3