[Bio] / FortyEightMeta / resume_after_fixed_fasta.pl Repository:
ViewVC logotype

View of /FortyEightMeta/resume_after_fixed_fasta.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Tue May 27 23:41:17 2008 UTC (11 years, 10 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Changes since 1.1: +2 -1 lines
just adding a test that the file was found

#__perl__
#
# This will resume after the reformat contigs step, so that you can correct the fna file in the raw directory, run this, and then mark preprocess as complete. Is easier than
# rerunning whole stage, especially when jobs have multiple fasta files in a tar file.
#
# Please note, you may need to delete the proc directory before you proceed, it will be replaced here
#

use strict;
use FIG;
use Data::Dumper;
use FIG_Config;
use File::Basename;
use File::Copy;
use GenomeMeta;
use Job48;
use Carp;

my $STAGE = "preprocess";

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job_id = basename($jobdir);
my $job = new Job48($jobdir);

$job or die "Cannot create job for $job_id\n";

my $meta = $job->meta;

print "Running job! $jobdir\n";

$job->meta->set_metadata("status.$STAGE", "in_progress");

#
# Find needed executables
#

my $renumber_exe = "$FIG_Config::bin/renumber_sequences";
unless (-x $renumber_exe) {$renumber_exe = "/vol/metagenome-48-hour/FIGdisk.backend.prod/dist/releases/dev/cee/bin/renumber_sequences"}
-x $renumber_exe or &fatal("Executable missing: $renumber_exe");

my $count_fasta_exe = "$FIG_Config::bin/countfastachars";
unless (-x $count_fasta_exe) {$count_fasta_exe = "/vol/metagenome-48-hour/FIGdisk.backend.prod/dist/releases/dev/cee/bin/countfastachars"}
-x $count_fasta_exe or &fatal("Executable missing: $count_fasta_exe");

#
# Create directories if needed
#

chdir($jobdir) or &fatal("cannot chdir $jobdir: $!");

-d "raw" or mkdir "raw", 0777 or &fatal("cannot mkdir raw in $jobdir: $!");
-d "proc" or mkdir "proc", 0777 or &fatal("cannot mkdir proc in $jobdir: $!");
-d "sge_output" or mkdir "sge_output", 0777 or &fatal("cannot mkdir sge_output in $jobdir: $!");

#
# Extract data into the raw dir
#


my $tar = $meta->get_metadata("source_archive");
my $fasta = $meta->get_metadata("source_fasta");
my $fna = $meta->get_metadata("source_file");

# we'll figure out from the file names
my ($fna_file, $qual_file);
opendir(DIR, "$jobdir/raw/") || die "Can't open $jobdir/raw/";
foreach my $filetest (grep {$_ !~ /^\./} readdir(DIR))
{
	if (-e "$jobdir/raw/$filetest.unformatted") {$fna_file = $filetest}
	if ($filetest =~ /qual$/) { $qual_file = $filetest}
}

unless ($fna_file && -e "$jobdir/raw/$fna_file") {&fatal("Cannot find a fasta file in $jobdir/raw Expected to find '$fna_file'")}
print STDERR "Working on FNA: $fna_file QUAL: $qual_file\n";


print "Found fna=$fna_file qual=$qual_file\n";



#
# Process data.
#

chdir "$jobdir/proc" or &fatal("cannot chdir to $jobdir/proc: $!");

my @args;
push @args, "-f", "$jobdir/raw/$fna_file";
push @args, "-q", "$jobdir/raw/$qual_file" if $qual_file;
push @args, "-g", $job->genome_name();
print "Run $renumber_exe @args\n";
@args = map { "'$_'" } @args;
my $rc = system("$renumber_exe @args > renumber.stdout 2> renumber.stderr");
$rc == 0 or &fatal("Failed with rc=$rc: $renumber_exe @args");

#
# Determine genome #.
#

my $gnum = $meta->get_metadata("$STAGE.genome_num");
my $snum = $meta->get_metadata("$STAGE.sequence_num");

&fatal("can't extract genome number and sequence number. Please restart preprocessing from the beginning") unless ($gnum & $snum);

#
# While we are here, set up the taxonomy.
#
unless (-e "$jobdir/TAXONOMY")
{
	open(T, ">$jobdir/TAXONOMY") or &fatal("Cannot write $jobdir/TAXONOMY: $!");
	print T "unclassified; environmental samples; " . $job->genome_name() . "\n";
	close(T);
}


my $new_fa = "$gnum.fa";
my $new_qual = "$gnum.qual";

-f $new_fa or &fatal("Cannot find new fasta file $new_fa");
-f $new_qual or warn "Cannot find new qual file $new_qual";

$meta->set_metadata("$STAGE.genome_num", $gnum);
$meta->set_metadata("$STAGE.sequence_num", $snum);

$meta->set_metadata("$STAGE.fasta_file", "$jobdir/proc/$new_fa");
$meta->set_metadata("$STAGE.qual_file", "$jobdir/proc/$new_qual") if $new_qual;

#
# Now we can count.
#

&run("$count_fasta_exe -m $jobdir/meta.xml -k $STAGE.count_raw -s $jobdir/raw/$fna_file > count_raw.stdout 2> count_raw.stderr");

&run("$count_fasta_exe -m $jobdir/meta.xml -k $STAGE.count_proc -s $new_fa > count_proc.stdout 2> count_proc.stderr");

$job->meta->set_metadata("status.$STAGE", "complete");
$job->meta->set_metadata("$STAGE.running", "no");

sub create_and_register_genome_id
{
    my($gnum) = @_;

    my $tax = sprintf("444%04d", $gnum);
    my $proxy = SOAP::Lite-> uri('http://www.soaplite.com/Scripts')-> proxy($FIG_Config::clearinghouse_url);
    
    my $response = $proxy->register_genome($tax);
    if ($response->fault) {	
      die "Failed to deposit:	 " . $response->faultcode . " " . $response->faultstring;
    }	

    my $vers = $response->result;

    return "$tax.$vers";
}

sub run
{
    my(@cmd) = @_;

    print "Run @cmd\n";
    my $rc = system(@cmd);
    if ($rc != 0)
    {
	&fatal("Failed with rc=$rc: @cmd");
    }
}

sub fatal
{
    my($msg) = @_;

    $meta->add_log_entry($0, ['fatal error', $msg]);
    $meta->set_metadata("status.$STAGE", "error");

    croak "$0: $msg";
}
    


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3