[Bio] / FortyEightMeta / mg_compute_sims_on_timelogic.pl Repository:
ViewVC logotype

View of /FortyEightMeta/mg_compute_sims_on_timelogic.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Mon Oct 1 20:42:51 2007 UTC (12 years, 7 months ago) by mkubal
Branch: MAIN
Changes since 1.3: +6 -3 lines
set stage to done when finished

use FIG;
use strict;
use raelib;
use warnings;
use FIG_Config;
use File::Basename;
use GenomeMeta;

use DBMaster;
use Job48;

my $STAGE = "sims";

my $fig = new FIG;

# this defines the number of sequences for each job. 
use constant BIOBLASTA => 10000;
use constant BIOINTEL  => 2000;

my $STAGE = "sims";

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job_id = basename($jobdir);
my $job = new Job48($job_id);

my $meta = $job->meta;

print "Running job! $jobdir\n";

$job->meta->set_metadata("status.$STAGE", "in_progress");

my $proc = "$jobdir/proc";
chdir($proc);

my $fasta = $meta->get_metadata("preprocess.fasta_file");


my $tempf = "/vol/metagenome-48-hour/Data/tblastx-template";
#my $tempf = "/home/mkubal/TimeLogic_Templates/tblastx-template";

my @databases = ("nr_2007_0208","16s","lsu","gg");

foreach my $db (@databases){

    my $fa=raelib->read_fasta("$fasta"); # this is a generic method of reading a fasta file into a ref to a hash
    my @keys=keys %$fa;
    
    my @jobs;
    while (@keys)
    {
	# remove however many sequences we want to submit
	my @subset=splice(@keys, 0, BIOINTEL);
	
	# write them to a temporary file
	open(OUT, ">sequences_to_blast") || die "Can't write to sequences_to_blast";
	map {print OUT ">$_\n",$fa->{$_},"\n"} @subset;
	close OUT;
	
	my $job = `/decypher/cli/bin/dc_template -query sequences_to_blast -template $tempf -targ $db`; 
	$job =~ s/^OK1\s+//;
	chomp($job);
	push(@jobs,$job);
    }
    
    unlink "sequences_to_blast";
    
   #check status of jobs, 
    
    my $jobs_not_done = 1;
    my %completed_jobs;
    while ($jobs_not_done){
	open(RUN,"/decypher/cli/bin/dc_qstatus |");
	my $record_jobs = 0;
	while($_ = <RUN>){
	    if($_ =~/SOC/){
		if($record_jobs){
		    chomp($_);
		    $completed_jobs{$_} = 1;
		}
	    }
	    if($_ =~/Completed/){
		$record_jobs = 1;
	    }
	    if($_ =~/Running/){
		$record_jobs = 0;
	    }
	    
	}
	close(RUN);
    
	$jobs_not_done = 0;
	foreach my $job (@jobs){
	    if(!$completed_jobs{$job}){
		$jobs_not_done = 1;
	    }
	}
	sleep(60);
    }
    
   #once all jobs finish, parse results and write to $jobdir/proc 
    
    my $prefix;
    my $raw_sims;
    my $expanded_sims;
    
    if($db eq "nr_2007_0208"){
	$prefix = "seed";
	$expanded_sims = "sims.".$prefix.".raw.exp";
	open(EXP,">$expanded_sims");
    }
    else{$prefix = $db;}
    
    $raw_sims = "sims.".$prefix.".raw";
    open(RAW,">$raw_sims");

    #use sims hash to avoid duplicate entries
    my %sims;
    foreach my $job (@jobs){
	open(IN,"/decypher/output/$job.out");
	while($_ = <IN>){
	    #trim header
	    if($_ !~/QUERY/){
		if(! $sims{$_}){
		    my @fields =split("\t",$_);
		    my $primary = $fields[1];
		    if(! $primary){next}
		    print RAW "$_";
		    if($db eq "nr_2007_0208"){
			if($primary =~/fig/){
			    print EXP "$_";
			}
			else{
			    my @other_ids = $fig->mapped_prot_ids($primary);
			    foreach my $set (@other_ids){
				my($id,$len) = @$set;
				if($id !~/xxx/){
				    $fields[1] = $id;
				    my $temp_string = join("\t",@fields);
				    print EXP "$temp_string";
				}
			    }
			}
		    }
		    $sims{$_} = 1;
		}
	    }
	}
	close(IN);
    }

    $job->meta->set_metadata("status.$STAGE", "complete");
    $job->meta->set_metadata("$STAGE.running", "no");

}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3