[Bio] / FortyEightMeta / get_sim_status.pl Repository:
ViewVC logotype

View of /FortyEightMeta/get_sim_status.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Fri Apr 3 05:38:24 2009 UTC (10 years, 11 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Changes since 1.2: +1 -1 lines
changing the user object accessors

#
# Check on the status of the sims runs.
#
#

use strict;
use FIG;
use FIG_Config;
use File::Basename;
use Data::Dumper;
use GenomeMeta;
use Job48;
use JobStage;
use SGE;
use FortyEightMeta::SimDB;
use FortyEightMeta::SimStatusDB;

my $STAGE = "sims";

my $show_only = 1;

my $usage = "Usage: $0 [-resubmit] [-force] job-dir (force will resubmit unless marked complete)\n";
@ARGV == 1 or @ARGV == 2 or @ARGV == 3 or die $usage;

my $force;
while (@ARGV >1 )
{
    if ($ARGV[0] eq '-resubmit')
    {
	$show_only = 0;
	shift;
    }
    elsif ($ARGV[0] eq "-force")
    {
    	$force = 1;
	shift;
    }
    else
    {
	die $usage;
    }
}

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $stage = new JobStage('Job48', $STAGE, $jobdir);
$stage or die "Cannot create job for $jobdir\n";

my $compute_exe = "$FIG_Config::bin/mg_compute_sims";
-x $compute_exe or $stage->fatal("Executable missing: $compute_exe");

my $job_id = basename($jobdir);
my $job = $stage->job();

my $meta = $job->meta;

my $sge = new SGE;

my $proc = "$jobdir/proc";
chdir($proc) or $stage->fatal("cannot chdir $proc: $!");

#
# Retrieve info on the application status of the sims tasks
#

my $status_db = FortyEightMeta::SimStatusDB->new($job_id);

my @all_jobs = $status_db->get_tasks_where('true');

my %resubmit;
my %representative_record;

for my $jent (@all_jobs)
{
    my($sjob, $task, $rec) = @$jent;


    my $status = $rec->{status};

    if ($force && ($status eq 'complete' or  $status eq 'load_complete'))
    {
    	next;
    }
    elsif ($force)
    {
    	undef $status;
    }

    if ($status eq 'complete' or $status eq 'load_complete' or $status eq 'load_queued' or $status eq 'load_in_progress')
    {
	# print "OK $sjob $task " . Dumper($rec);
	next;
    }

    #
    # Not done; see what the sge status is.
    #
    my $sge_job = $sge->find_task($rec->{sim_sge_id}, $rec->{task_id});
    if (!$force && $sge_job)
    {
	my $jstate = $sge_job->state;
	if ($jstate eq 'queued')
	{
	    print "Queued: $sjob $task\n";
	}
	elsif ($jstate eq 'zombie')
	{
	    print "Queued: $sjob $task\n";
	}	    
	elsif ($jstate eq 'running')
	{
	    print "Running: $sjob $task\n";
	}
	elsif ($jstate eq 'pending')
	{
	    print "Pending: $sjob $task\n";
	}
	else
	{
	    print "Unknown status $jstate $sjob $task\n";
	}
    }
    else
    {
	#
	# No record of the job in the queue. We need to resubmit this task.
	#
	push @{$resubmit{$sjob}}, $task;
	$representative_record{$sjob} = $rec;
    }
}

my @sge_ids;

#
# Write a backup to the job directory.
#

if (0 && !$show_only)
{
    my $bfile = "$jobdir/sim_status_backup." . time;
    $status_db->backup($bfile);
    
    $stage->log("backed up sim status to $bfile");
}

while (my($jobtype, $tasks) = each(%resubmit))
{
    my @sets = combine_runs($tasks);
    # print "$jobtype: ". Dumper(\@sets);

    if ($show_only)
    {
	print "Need to rerun $jobtype:";
	for my $set (@sets)
	{
	    my ($start, $end) = @$set;
	    if ($start == $end)
	    {
		print " $start";
	    }
	    else
	    {
		print " $start-$end";
	    }
	}
	print "\n";
	next;
    }
	

    for my $set (@sets)
    {
	my($start, $end) = @$set;
	
	# print "Got tasks from $start to $end\n";
	$stage->log("Job $jobtype resubmitting tasks $start-$end");

	#
	# And submit.
	#
	
 	my @sge_args;

	my $abbr = $representative_record{$jobtype}->{abbr};
	my $path = $representative_record{$jobtype}->{work_dir};

	my $jobname = "m${abbr}_$job_id";
	
	push(@sge_args, "-N $jobname");
	push(@sge_args, "-v PATH");
	push(@sge_args, "-e $jobdir/sge_output");
	push(@sge_args, "-o $jobdir/sge_output");
	push(@sge_args, "-t $start-$end");
	push(@sge_args, "-b yes");
	#
	# metagenome 48hr jobs get low priority
	#
	#push(@sge_args, "-l low");

	my $sge_args = join(" ", @sge_args);
	
	my $sge_id;
	
	eval {
	    $sge_id = $sge->submit_job($meta, $sge_args, "$compute_exe $jobdir $path");
#	    print "Would submit '$sge_args' '$compute_exe' '$jobdir' '$path'\n";
	};
	
	if ($@)
	{
	    $stage->fatal($meta, "error starting SGE job $compute_exe $jobdir: $@\n");
	}
	
	#
	# Initialize sim status entries.
	#
	my $retries = $FIG_Config::mgrast_blast_retries;
	$retries = 3 unless defined($retries);
	for my $t ($start .. $end)
	{
	    my $rec = {};
	    $rec->{sim_sge_id} = $sge_id;
	    $rec->{abbr} = $abbr;
	    $rec->{work_dir} = $path;
	    $rec->{blast_retries_left} = $retries;
	    $rec->{status} = 'not_started';

	    $status_db->set_task($jobtype, $t, $rec);
	    #print "Would set for $t " . Dumper($rec);
	}    

	push(@sge_ids, $sge_id);
    }
}

if ($show_only)
{
    exit;
}
    
$stage->set_qualified_metadata("sge_ids", \@sge_ids);
$stage->set_status("complete");
$stage->set_running("no");

#
# Combine runs of consecutive numbers into $a-$b ranges. Return a
# list of [$start, $end] pairs.
#
sub combine_runs
{
    my($list) = @_;

    my @slist = sort { $a <=> $b } @$list;

    my @out;

    while (@slist)
    {
	my $val = shift @slist;
	my $start = $val;
	while (@slist and $slist[0] == $val + 1)
	{
	    $val = shift(@slist);
	}
	push(@out, [$start, $val]);
    }

    return @out;
}
1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3