[Bio] / FortyEightMeta / resume_after_fixed_fasta.pl Repository:
ViewVC logotype

View of /FortyEightMeta/resume_after_fixed_fasta.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Sun Mar 16 18:27:02 2008 UTC (12 years, 2 months ago) by redwards
Branch: MAIN
adding script for resuming preprocessing after fixing a fasta file

# This will resume after the reformat contigs step, so that you can correct the fna file in the raw directory, run this, and then mark preprocess as complete. Is easier than
# rerunning whole stage, especially when jobs have multiple fasta files in a tar file.
# Please note, you may need to delete the proc directory before you proceed, it will be replaced here

use strict;
use FIG;
use FIG_Config;
use File::Basename;
use File::Copy;
use GenomeMeta;
use Job48;
use Carp;

my $STAGE = "preprocess";

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job_id = basename($jobdir);
my $job = new Job48($jobdir);

$job or die "Cannot create job for $job_id\n";

my $meta = $job->meta;

print "Running job! $jobdir\n";

$job->meta->set_metadata("status.$STAGE", "in_progress");

# Find needed executables

my $renumber_exe = "$FIG_Config::bin/renumber_sequences";
unless (-x $renumber_exe) {$renumber_exe = "/vol/metagenome-48-hour/FIGdisk.backend.prod/dist/releases/dev/cee/bin/renumber_sequences"}
-x $renumber_exe or &fatal("Executable missing: $renumber_exe");

my $count_fasta_exe = "$FIG_Config::bin/countfastachars";
unless (-x $count_fasta_exe) {$count_fasta_exe = "/vol/metagenome-48-hour/FIGdisk.backend.prod/dist/releases/dev/cee/bin/countfastachars"}
-x $count_fasta_exe or &fatal("Executable missing: $count_fasta_exe");

# Create directories if needed

chdir($jobdir) or &fatal("cannot chdir $jobdir: $!");

-d "raw" or mkdir "raw", 0777 or &fatal("cannot mkdir raw in $jobdir: $!");
-d "proc" or mkdir "proc", 0777 or &fatal("cannot mkdir proc in $jobdir: $!");
-d "sge_output" or mkdir "sge_output", 0777 or &fatal("cannot mkdir sge_output in $jobdir: $!");

# Extract data into the raw dir

my $tar = $meta->get_metadata("source_archive");
my $fasta = $meta->get_metadata("source_fasta");
my $fna = $meta->get_metadata("source_file");

# we'll figure out from the file names
my ($fna_file, $qual_file);
opendir(DIR, "$jobdir/raw/") || die "Can't open $jobdir/raw/";
foreach my $filetest (grep {$_ !~ /^\./} readdir(DIR))
	if (-e "$jobdir/raw/$filetest.unformatted") {$fna_file = $filetest}
	if ($filetest =~ /qual$/) { $qual_file = $filetest}

unless (-e "$jobdir/raw/$fna_file") {&fatal("Cannot find a fasta file in $jobdir/raw")}
print STDERR "Working on FNA: $fna_file QUAL: $qual_file\n";

print "Found fna=$fna_file qual=$qual_file\n";

# Process data.

chdir "$jobdir/proc" or &fatal("cannot chdir to $jobdir/proc: $!");

my @args;
push @args, "-f", "$jobdir/raw/$fna_file";
push @args, "-q", "$jobdir/raw/$qual_file" if $qual_file;
push @args, "-g", $job->genome_name();
print "Run $renumber_exe @args\n";
@args = map { "'$_'" } @args;
my $rc = system("$renumber_exe @args > renumber.stdout 2> renumber.stderr");
$rc == 0 or &fatal("Failed with rc=$rc: $renumber_exe @args");

# Determine genome #.

my $gnum = $meta->get_metadata("$STAGE.genome_num");
my $snum = $meta->get_metadata("$STAGE.sequence_num");

&fatal("can't extract genome number and sequence number. Please restart preprocessing from the beginning") unless ($gnum & $snum);

# While we are here, set up the taxonomy.
unless (-e "$jobdir/TAXONOMY")
	open(T, ">$jobdir/TAXONOMY") or &fatal("Cannot write $jobdir/TAXONOMY: $!");
	print T "unclassified; environmental samples; " . $job->genome_name() . "\n";

my $new_fa = "$gnum.fa";
my $new_qual = "$gnum.qual";

-f $new_fa or &fatal("Cannot find new fasta file $new_fa");
-f $new_qual or warn "Cannot find new qual file $new_qual";

$meta->set_metadata("$STAGE.genome_num", $gnum);
$meta->set_metadata("$STAGE.sequence_num", $snum);

$meta->set_metadata("$STAGE.fasta_file", "$jobdir/proc/$new_fa");
$meta->set_metadata("$STAGE.qual_file", "$jobdir/proc/$new_qual") if $new_qual;

# Now we can count.

&run("$count_fasta_exe -m $jobdir/meta.xml -k $STAGE.count_raw -s $jobdir/raw/$fna_file > count_raw.stdout 2> count_raw.stderr");

&run("$count_fasta_exe -m $jobdir/meta.xml -k $STAGE.count_proc -s $new_fa > count_proc.stdout 2> count_proc.stderr");

$job->meta->set_metadata("status.$STAGE", "complete");
$job->meta->set_metadata("$STAGE.running", "no");

sub create_and_register_genome_id
    my($gnum) = @_;

    my $tax = sprintf("444%04d", $gnum);
    my $proxy = SOAP::Lite-> uri('http://www.soaplite.com/Scripts')-> proxy($FIG_Config::clearinghouse_url);
    my $response = $proxy->register_genome($tax);
    if ($response->fault) {	
      die "Failed to deposit:	 " . $response->faultcode . " " . $response->faultstring;

    my $vers = $response->result;

    return "$tax.$vers";

sub run
    my(@cmd) = @_;

    print "Run @cmd\n";
    my $rc = system(@cmd);
    if ($rc != 0)
	&fatal("Failed with rc=$rc: @cmd");

sub fatal
    my($msg) = @_;

    $meta->add_log_entry($0, ['fatal error', $msg]);
    $meta->set_metadata("status.$STAGE", "error");

    croak "$0: $msg";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3