[Bio] / FortyEight / rp_write_exports.pl Repository:
ViewVC logotype

View of /FortyEight/rp_write_exports.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Wed Feb 13 19:06:48 2008 UTC (12 years, 1 month ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_07_21, rast_rel_2008_04_23
Changes since 1.2: +17 -0 lines
Create contig btrees if not already there; this HUGEly accelerates download creation on large genomes.

#
# Write the export files for this completed job.
#

use Data::Dumper;
use Carp;
use strict;
use FIG;
use FIG_Config;
use FileHandle;
use File::Basename;
use GenomeMeta;
use SeedExport;
use Job48;

@ARGV == 1 or die "Usage: $0 job-dir\n";

my $jobdir = shift;

-d $jobdir or die "$0: job dir $jobdir does not exist\n";

my $job = new Job48($jobdir);
$job or die "cannot create job for $jobdir";

my $hostname = `hostname`;
chomp $hostname;

my $genome = &FIG::file_head("$jobdir/GENOME_ID");
chomp $genome;
$genome =~ /^\d+\.\d+/ or die "$0: Cannnot find genome ID for jobdir $jobdir\n";

my $meta = new GenomeMeta($genome, "$jobdir/meta.xml");

my $genome_dir = "$jobdir/rp/$genome";

my $export_dir = "$jobdir/download";
&FIG::verify_dir($export_dir);

$meta->set_metadata("export.hostname", $hostname);
$meta->set_metadata("export.running", "yes");
$meta->set_metadata("status.export", "in_progress");

$meta->add_log_entry($0, "Writing exports to $export_dir");

my @export_types = qw(genbank GTF embl gff);
my @strip_ec_flag = (0, 1);
my %export_names = (genbank => "Genbank",
		    GTF => "GTF",
		    embl => "EMBL",
		    gff => "GFF3",
		    );
my %export_suffix = (genbank => "gbk",
		    GTF => "gtf",
		    embl => "embl",
		    gff => "gff",
		    );

#
# If we have not yet indexed the contigs, do that here. Speeds a lot
# of stuff up.
#

if (! -f "$genome_dir/contigs.btree")
{
    my $rc = system("$FIG_Config::bin/make_fasta_btree",
		    "$genome_dir/contigs",
		    "$genome_dir/contigs.btree",
		    "$genome_dir/contig_len.btree");
    if ($rc != 0)
    {
	warn "make_fasta_btree failed with rc $rc\n";
    }
}

open(I, ">$export_dir/index");
for my $strip_ec (@strip_ec_flag)
{
    my $strip_fn_part = $strip_ec ? ".ec-stripped" : "";
    my $strip_msg = $strip_ec ? " (EC numbers stripped)" : "";
    for my $type (@export_types)
    {
	$meta->add_log_entry($0, "Exporting type $type");

	my $filename = "$genome${strip_fn_part}.$export_suffix{$type}";
	
	my $p = {
	    virtual_genome_directory => $genome_dir,
	    genome => $genome,
	    directory => "$export_dir/",
	    filename => "$export_dir/$filename",
	    export_format => $type,
	    strip_ec => $strip_ec,
	};
	eval {
	    SeedExport::export($p);
	    print I "$filename\t$export_names{$type}$strip_msg\n";
	};
	if ($@)
	{
	    &fatal("Error exporting type $type of genome $genome to $export_dir: $@");
	}
    }
}
I->autoflush(1);

#
# Also export the entire genome directory as a tgz file.
#

my @cmd = ("tar", "-C", "$jobdir/rp", "-z", "-c", "-f", "$export_dir/$genome.tgz", $genome);
my $rc = system(@cmd);
if ($rc != 0)
{
    &fatal("error $rc creating tarfile with: @cmd");
}

print I "$genome.tgz\tGenome directory\n";
close(I);

$meta->set_metadata("export.running", "no");
$meta->set_metadata("status.export", "complete");

exit(0);

sub fatal
{
    my($msg) = @_;

    if ($meta)
    {
	$meta->add_log_entry($0, ['fatal error', $msg]);
	$meta->set_metadata("export.running", "no");
	$meta->set_metadata("status.export", "error");
    }
    croak "$0: $msg";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3