[Bio] / FortyEightMeta / mg_upgrade_preprocess.pl Repository:
ViewVC logotype

View of /FortyEightMeta/mg_upgrade_preprocess.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sat Jan 3 23:33:06 2009 UTC (11 years, 5 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
two parts of the code for upgrading from an old version of mg-rast to a newer version - the preprocess and the sims

#__perl__


=pod

=head1 upgrade_metagenome

This will take a metagenome and copy most of the important information over to a new metagenome, and then run it through the upgrade steps. 

At the moment, we make it DELETE so that the auto job checker doesn't try and process it.


The program has two options at the moment: 

 -j job number or directory. You can provide either
 -u user for the new job, in case you want to run as someone else and then let them take over later.

Please note that there is no authentication control in this. It is assumed if you are running it you can see the jobs anyway.

=author

Rob Edwards
December 11 2008

=cut

use strict;
use warnings;

use GenomeMeta;
use Job48;
use FIG_Config;
use ClusterStage;
use SGE;
my $sge=new SGE;


use Getopt::Std;
my %opts;
getopts('j:u:', \%opts);
my $usage=<<EOF;
$0
-j original job number or directory (if you use the number, we'll add the base directory $FIG_Config::mgrast_jobs)
-u new user name for the job. Otherwise will be the same as the old username

EOF

my $jobdir;
if ($opts{j} && $opts{j} =~ /^\d+$/) {
	$jobdir=$FIG_Config::mgrast_jobs."/".$opts{j};
}
elsif ($opts{j} && -d $opts{j} && -e "$opts{j}/meta.xml") {$jobdir=$opts{j}}
else {die $usage}

$jobdir =~ m/(\d+)$/;
my $jobid = $1;

unless (-e "$jobdir/meta.xml") {die "There does not appear to be a meta.xml file in $jobdir. Is that the correct directory?"}
my $mf = "$jobdir/meta.xml";

my $meta = GenomeMeta->new(undef, $mf);

# find the fasta file from the meta
my $faf;
eval {$faf=$meta->get_metadata("preprocess.fasta_file")};
if ($@) {die "There was an error getting the preprocess.fasta_file from $mf"}
my $qaf=$meta->get_metadata("preprocess.qual_file");


my $genome = `cat $jobdir/GENOME`; chomp($genome);
my $proj   = `cat $jobdir/PROJECT`; chomp($proj);
my $user   = $opts{u} || `cat $jobdir/USER`; chomp($user);
my $public = (-e "$jobdir/PUBLIC") ? 1 : 0;
my $dups   = $meta->get_metadata('remove_duplicates') || 1;
my $desc   = $meta->get_metadata("project.description");

my $job = { 
	'genome'      => $genome,
	'project'     => $proj,
	'user'        => $user,
	'taxonomy'    => '',
	'metagenome'  => 1,
	'meta' => { 
		'source_file'    => $faf,
		'project.description' => $desc,
		'options.remove_duplicates' => $dups,
		'options.public' => $public,
	},
};


for my $opt (qw(altitude longitude latitude time habitat))
{
	my $val = $meta->get_metadata("optional_info.$opt");
	$val =~ s/\n//g;
	$val =~ s/\r//g;
	$val =~ s/\s*$//;
	$val =~ s/^\s*//;
	while (chomp($val)) {$val =~ s/\s*$//;  $val =~ s/^\s*//;} # I'm not sure why this creating so many blank lines in the meta.xml file
	if ($val =~ /\S/) {
		$job->{meta}->{"optional_info.$opt"} = $val;
	}
}
$job->{meta}->{source_fasta} = $faf;
$job->{meta}->{source_qual} = $qaf if (defined $qaf);

# create the job
# from here, all things like job id, jobdir, and meta have new in front of them if they refer to the new job version

my ($newjobid, $msg) = Job48->create_new_job($job);
if ($newjobid) {
	print "The job was created with job id $newjobid\n";
} else {
	print STDERR "The job was not created but this is it's info: ", Dumper($job), "\n";
}

if ($msg) {
	print STDERR "There was a warning: $msg while trying to create the job\n";
}

## END COPY PART - the above section was imported from copy_metagenome

my $newjobdir=$FIG_Config::mgrast_jobs."/".$newjobid;

# mark it as DELETED so we don't work on it automatically

`mv $newjobdir/ACTIVE $newjobdir/DELETE`

# Set the meta flag that says where we came from

my $nmf = "$newjobdir/meta.xml";
my $newmeta = GenomeMeta->new(undef, $nmf);

$newmeta->set("upgrade_source", $jobid);
$newmeta->set("original_preprocess.count_proc.file", $job->{meta}->{'preprocess.count_proc.file'});

# now we want to submit the preprocess to the cluster

my $name = 'preprocess'; 
my $processor = ClusterStage->new('mg_preprocess', sge_flag => "-l mg_preprocess");

eval {
	if (ref($processor) eq 'CODE')
	{   
		&$processor($name, $newjobid, $newjobdir, $newmeta, $sge);
	}
	elsif (ref($processor))
	{
		$processor->process($name, $newjobid, $newjobdir, $newmeta, $sge);
	}
	else
	{
		warn "Unknown processor " . Dumper($processor);
	}
};
if ($@)
{
	print "Error processing job $newjobid\n$@\n";
}




MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3