[Bio] / FortyEight / imp_process_sims.pl Repository:
ViewVC logotype

View of /FortyEight/imp_process_sims.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Sep 5 20:59:03 2007 UTC (12 years, 3 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Rollup of RAST->SEED import stuff.

#
# Postprocess computed sims. 
#
# The sim compute happens in a sims workdir. The timelogic sim submission results in the
# creation of two files, a task list and a job map.
#
# The task.list maps a task number to input and output files, and parameters.
#
# The job map maps an input filename to a timelogic job number.
#
# The task of this script is to identify the output files for all tasks, to
# ensure they exist and to do a sanity check that the majority of the input
# sequences are accounted for in the generated data. Once the sanity
# checking is complete, standard SEED postprocessing is performed and a
# flipped sims file is created.
#

use strict;

use Data::Dumper;
use FIG;
use FIG_Config;
use File::Basename;
use File::Copy;
use ImportJob;
use GenomeMeta;
use JobStage;

my $hits_max = 300;

@ARGV == 2 or die "Usage: $0 job-dir sim-dir\n";

my $jobdir = shift;
my $simdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $stage = new JobStage('ImportJob', 'process_sims', $jobdir);

$stage or die "$0: Could not create job object";
my $job = $stage->job();

$stage->log("Running on " . $stage->hostname);

$stage->set_status("running");
$stage->set_running("yes");

$stage->set_qualified_metadata("host", $stage->hostname);

#
# Load job map.
#

open(JM, "<$simdir/job.map") or $stage->fatal("Cannot open jobmap $simdir/job.map: $!");

my %jobmap;
while (<JM>)
{
    chomp;
    my($file, $tl_file) = split(/\t/);
    $jobmap{$file} = $tl_file;
}
close(JM);

open(TL, "<$simdir/task.list") or $stage->fatal("Cannot open task list $simdir/task.list: $!");

my @tasks;
while (<TL>)
{
    chomp;
    my($id, $in, $nr, $args, $out, $err) = split(/\t/);

    my $simfile = $jobmap{basename($in)};
    if (!$simfile)
    {
	$stage->fatal("Cannot map input file $in\n");
    }

    $simfile = "$simdir/sims/$simfile.out";
    if (! -f $simfile)
    {
	$stage->fatal("Cannot open mapped input file $simfile (for $in)");
    }

    push(@tasks, [$id, $in, $nr, $args, $out, $err, $simfile]);
}

#
# Process sims into $simdir/processed
#

my $procdir = "$simdir/processed";
if (-d $procdir)
{
    rename($procdir, "$procdir." . time);
}
mkdir($procdir) or $stage->fatal("cannot mkdir $procdir: $!");

my $syn = "$jobdir/peg.synonyms";
my $nr = "$jobdir/nr";

my $prefix = "sims." . $job->id();

my $pipeline = "reformat_timelogic_sims | ";
$pipeline .= "reduce_sims $syn $hits_max | reformat_sims $nr | split_sims $procdir $prefix";

open(PIPE, "|$pipeline") or $stage->fatal("cannot run pipeline $pipeline: $!");

for my $task (@tasks)
{
    my($id, $in, $nr, $args, $out, $err, $simfile) = @$task;

    open(F, "<$simfile") or $stage->fatal("Cannot open $simfile: $!");
    copy(\*F, \*PIPE);
    close(F);
}
close(PIPE) or $stage->fatal("Error closing pipeline $pipeline: \$!=$! \$?=$?");

$stage->log("completed");
$stage->set_running("no");
$stage->set_status("complete");

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3