[Bio] / FortyEightMeta / mg_compute_sims_on_timelogic.pl Repository:
ViewVC logotype

View of /FortyEightMeta/mg_compute_sims_on_timelogic.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (download) (as text) (annotate)
Thu Oct 11 16:37:35 2007 UTC (12 years, 7 months ago) by mkubal
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Changes since 1.8: +11 -6 lines
-priority 5, records SOC job to meta.xml

use FIG;
use strict;
use raelib;
use warnings;
use FIG_Config;
use File::Basename;
use GenomeMeta;

use DBMaster;
use Job48;

my $STAGE = "sims";

my $fig = new FIG;

# this defines the number of sequences for each job. 
use constant BIOBLASTA => 10000;
use constant BIOINTEL  => 2000;

my $STAGE = "sims";

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job_id = basename($jobdir);
my $job = new Job48($job_id);

my $meta = $job->meta;

print "Running job! $jobdir\n";

$job->meta->set_metadata("status.$STAGE", "in_progress");

my $proc = "$jobdir/proc";
chdir($proc);

my $fasta = $meta->get_metadata("preprocess.fasta_file");


my $tempf;

my @databases = ("nr_2007_0208","16s","lsu","gg","ssu");

foreach my $db (@databases){
    
    if($db eq "nr_2007_0208"){ $tempf = "/vol/metagenome-48-hour/Data/tblastx-template-nr";}
    else{$tempf = "/vol/metagenome-48-hour/Data/tblastn-template-others";}

    my $fa=raelib->read_fasta("$fasta"); # this is a generic method of reading a fasta file into a ref to a hash
    my @keys=keys %$fa;
    
    my @jobs;
    while (@keys)
    {
	# remove however many sequences we want to submit
	my @subset=splice(@keys, 0, BIOINTEL);
	
	# write them to a temporary file
	open(OUT, ">sequences_to_blast") || die "Can't write to sequences_to_blast";
	map {print OUT ">$_\n",$fa->{$_},"\n"} @subset;
	close OUT;
	
	my $tl_job = `/decypher/cli/bin/dc_template -query sequences_to_blast -template $tempf -targ $db -priority 5`; 
	$tl_job =~ s/^OK1\s+//;
	chomp($tl_job);
	push(@jobs,$tl_job);
	$job->meta->set_metadata("$tl_job", "submitted");
    }
    
    unlink "sequences_to_blast";
    
   #check status of jobs, 
    
    my $jobs_not_done = 1;
    my %completed_jobs;
    while ($jobs_not_done){
	open(RUN,"/decypher/cli/bin/dc_qstatus |");
	my $record_jobs = 0;
	while($_ = <RUN>){
	    if($_ =~/SOC/){
		if($record_jobs){
		    chomp($_);
		    $completed_jobs{$_} = 1;
		}
	    }
	    if($_ =~/Completed/){
		$record_jobs = 1;
		
	    }
	    if($_ =~/Running/){
		$record_jobs = 0;
	    }
	    
	}
	close(RUN);
    
	$jobs_not_done = 0;
	foreach my $tl_job (@jobs){
	    if(!$completed_jobs{$tl_job}){
		$jobs_not_done = 1;
	    }
	    else{
		$job->meta->set_metadata("$tl_job", "completed");
	    }
	}
	sleep(60);
    }
    
   #once all jobs finish, parse results and write to $jobdir/proc 
    
    my $prefix;
    my $raw_sims;
    my $expanded_sims;
    
    if($db eq "nr_2007_0208"){
	$prefix = "seed";
	$expanded_sims = "sims.".$prefix.".raw.exp";
	open(EXP,">$expanded_sims");
    }
    else{$prefix = $db;}
    
    $raw_sims = "sims.".$prefix.".raw";
    open(RAW,">$raw_sims");

    #use sims hash to avoid duplicate entries
    my %sims;
    foreach my $job (@jobs){
	open(IN,"/decypher/output/$job.out");
	while($_ = <IN>){
	    #trim header
	    if($_ !~/QUERY/){
		if(! $sims{$_}){
		    my @fields =split("\t",$_);
		    my $primary = $fields[1];
		    if(! $primary){next}
		    print RAW "$_";
		    if($db eq "nr_2007_0208"){
			if($primary =~/fig/){
			    print EXP "$_";
			}
			else{
			    my @other_ids = $fig->mapped_prot_ids($primary);
			    foreach my $set (@other_ids){
				my($id,$len) = @$set;
				if($id !~/xxx/){
				    $fields[1] = $id;
				    my $temp_string = join("\t",@fields);
				    print EXP "$temp_string";
				}
			    }
			}
		    }
		    $sims{$_} = 1;
		}
	    }
	}
	close(IN);
    }

}

$job->meta->set_metadata("status.$STAGE", "complete");
$job->meta->set_metadata("$STAGE.running", "no");

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3