[Bio] / FortyEight / rp_write_exports.pl Repository:
ViewVC logotype

View of /FortyEight/rp_write_exports.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (download) (as text) (annotate)
Wed Jun 8 20:58:35 2011 UTC (8 years, 5 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_release_3_1_2, mgrast_release_3_1_1, rast_rel_2011_0928, mgrast_dev_10262011
Changes since 1.10: +10 -0 lines
Spreadsheet export, new script for dumping queue data.

#
# Write the export files for this completed job.
#

use Data::Dumper;
use Carp;
use strict;
use FIG;
use FIG_Config;
use FileHandle;
use File::Basename;
use GenomeMeta;
use SeedExport;
use Job48;

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job = new Job48($jobdir);
$job or die "cannot create job for $jobdir";

my $hostname = `hostname`;
chomp $hostname;

my $genome = &FIG::file_head("$jobdir/GENOME_ID");
chomp $genome;
$genome =~ /^\d+\.\d+/ or die "$0: Cannnot find genome ID for jobdir $jobdir\n";

my $meta = new GenomeMeta($genome, "$jobdir/meta.xml");

my $genome_dir = "$jobdir/rp/$genome";

my $export_dir = "$jobdir/download";
&FIG::verify_dir($export_dir);

$meta->set_metadata("export.hostname", $hostname);
$meta->set_metadata("export.running", "yes");
$meta->set_metadata("status.export", "in_progress");

#
# Before writing the export, perform a final check on the genome directory.
# If this is not intended for SEED use, we pass the -no_fatal_stops
# option in order to have jobs not fail in the face of some of the
# genes with bad stops that are currently being generated.
#
# If keep_genecalls is enabled, skip the verify entirely.
#

my $keep_genecalls = $meta->get_metadata("keep_genecalls");

my $genetic_code = $meta->get_metadata("genome.genetic_code");

unless ($keep_genecalls)
{
    my @verify_cmd = ("$FIG_Config::bin/verify_genome_directory");
    
    if (!($meta->get_metadata("import.candidate")))
    {
	push(@verify_cmd, "-no_fatal_stops");
    }
    if ($genetic_code ne '')
    {
	push(@verify_cmd, "-code=$genetic_code");
    }
    push(@verify_cmd, $genome_dir);
    
    my $verify_cmd = "@verify_cmd > $jobdir/rp.errors/verify_genome_directory.report 2>&1";
    
    $meta->add_log_entry($0, "Verifying with command: $verify_cmd");
    my $rc = system($verify_cmd);
    
    if ($rc != 0)
    {
	$meta->set_metadata("genome.directory_verification_status", "failed_$rc");
	&fatal("verify_genome_directory failed with rc=$rc");
    }
    
    $meta->set_metadata("genome.directory_verification_status", "success");
    $meta->add_log_entry($0, "Verification succeeded");
}
else
{
    $meta->set_metadata("genome.directory_verification_status", "skipped");
}

if (!$meta->get_metadata("correction.frameshifts"))
{
    $meta->add_log_entry($0, "Computing possible frameshifts");
    #
    # correct_frameshifts appends to an existing file.
    #
    unlink("$genome_dir/possible.frameshifts");
    system("$FIG_Config::bin/correct_frameshifts", "-justMark", $genome_dir);
}

#
# Save job signature data for later identical-run replication.
#
# Don't do this if the signature file already exists; rp_write_exports
# may be run multiple times and we only want to do this once. It might
# be better placed somewhere else, but this is a convenient place
# for it.
#

my $sig_file = "$jobdir/JOB_SIGNATURE";
if (! -f $sig_file || -s $sig_file == 0)
{
    my $rc = system("$FIG_Config::bin/save_job_signature $jobdir > $jobdir/rp.errors/save_job_signature.out 2>&1");
    if ($rc != 0)
    {
	$meta->add_log_entry($0, "Error saving job signature rc$rc");
    }
    else
    {
	my $sig = &FIG::file_head($sig_file, 1);
	chomp $sig;
	$meta->add_log_entry($0, "Job signature saved: $sig");
    }
}


#
# Write the go term mappings.
#
my $rc = system("$FIG_Config::bin/write_go_mappings $genome_dir > $genome_dir/go.mappings"); 
if ($rc == 0)
{
    $meta->add_log_entry($0, "wrote go mappings");
}
else
{
    $meta->add_log_entry($0, "error $rc invoking $FIG_Config::bin/write_go_mappings $genome_dir > $genome_dir/go.mappings");
}


$meta->add_log_entry($0, "Writing exports to $export_dir");


my @export_types = qw(genbank GTF embl gff);
my @strip_ec_flag = (0, 1);
my %export_names = (genbank => "Genbank",
		    GTF => "GTF",
		    embl => "EMBL",
		    gff => "GFF3",
		    );
my %export_suffix = (genbank => "gbk",
		     GTF => "gtf",
		     embl => "embl",
		     gff => "gff",
		    );

#
# If we have not yet indexed the contigs, do that here. Speeds a lot
# of stuff up.
#

if (! -f "$genome_dir/contigs.btree")
{
    my $rc = system("$FIG_Config::bin/make_fasta_btree",
		    "$genome_dir/contigs",
		    "$genome_dir/contigs.btree",
		    "$genome_dir/contig_len.btree");
    if ($rc != 0)
    {
	warn "make_fasta_btree failed with rc $rc\n";
    }
}

open(I, ">$export_dir/index");
for my $strip_ec (@strip_ec_flag)
{
    my $strip_fn_part = $strip_ec ? ".ec-stripped" : "";
    my $strip_msg = $strip_ec ? " (EC numbers stripped)" : "";
    for my $type (@export_types)
    {
	$meta->add_log_entry($0, "Exporting type $type");

	my $filename = "$genome${strip_fn_part}.$export_suffix{$type}";
	
	my $p = {
	    virtual_genome_directory => $genome_dir,
	    genome => $genome,
	    directory => "$export_dir/",
	    filename => "$export_dir/$filename",
	    export_format => $type,
	    strip_ec => $strip_ec,
	};
	eval {
	    SeedExport::export($p);
	    print I "$filename\t$export_names{$type}$strip_msg\n";
	};
	if ($@)
	{
	    &fatal("Error exporting type $type of genome $genome to $export_dir: $@");
	}
    }
}

I->autoflush(1);

#
# Also export the entire genome directory as a tgz file.
#

my @cmd = ("tar", "-C", "$jobdir/rp", "-z", "-c", "-f", "$export_dir/$genome.tgz", $genome);
my $rc = system(@cmd);
if ($rc != 0)
{
    &fatal("error $rc creating tarfile with: @cmd");
}

print I "$genome.tgz\tGenome directory\n";

#
# And the fasta files.
#

$genetic_code = 11 if $genetic_code eq '';
$rc = system("$FIG_Config::bin/get_fasta_for_tbl_entries -code=$genetic_code $genome_dir/contigs < $genome_dir/Features/peg/tbl > $export_dir/$genome.faa");
if ($rc != 0)
{
    warn "error $rc writing faa export\n";
}
print I "$genome.faa\tAmino-Acid FASTA file\n";

$rc = system("$FIG_Config::bin/get_dna $genome_dir/contigs $genome_dir/Features/peg/tbl > $export_dir/$genome.fna");
if ($rc != 0)
{
    warn "error $rc writing fna export\n";
}
print I "$genome.fna\tNucleic-Acid FASTA file\n";

close(I);

#
# Write spreadsheets.
#

$rc = system("$FIG_Config::bin/rp_write_spreadsheets", $jobdir);
if ($rc = 0)
{
    warn "error $rc writing spreadsheets\n";
}

$meta->set_metadata("export.running", "no");
$meta->set_metadata("status.export", "complete");

exit(0);

sub fatal
{
    my($msg) = @_;

    if ($meta)
    {
	$meta->add_log_entry($0, ['fatal error', $msg]);
	$meta->set_metadata("export.running", "no");
	$meta->set_metadata("status.export", "error");
    }
    croak "$0: $msg";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3