[Bio] / FigKernelScripts / pg_pipeline.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_pipeline.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.51 - (download) (as text) (annotate)
Thu Aug 1 17:15:51 2013 UTC (6 years, 3 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.50: +1 -1 lines
readdble.tree is in Analysis

use RASTserver;
use strict;
use Data::Dumper;
use File::Basename;
use Getopt::Long;
use SeedEnv;
use PG;

my $usage = "usage: pg_pipeline -u User -p Password -g genetic-code [-t taxon] -d DataDir\n";
$ENV{PATH} = "/vol/kbase/deployment/bin:" . $ENV{PATH};

my($username,$password);
my $genetic_code = 11;
my $dataD;
my $taxon;

my $rc  = GetOptions('d=s' => \$dataD,
		     'g=i' => \$genetic_code,
		     't=i' => \$taxon,
		     'u=s' => \$username,
		     'p=s' => \$password);

if ((! $rc) || 
    (! -d $dataD))
{ 
    print STDERR $usage; exit ;
}

mkdir("$dataD/Analysis",0777);

my %check;
foreach my $_ (`cat $dataD/pubseed.seed $dataD/anno.seed`)
{
    ($_ =~ /\t(\d+\.\d+)/) || die "BAD genome: $_";
    if ($check{$1})          { die "$1 occurs multiple times" }
    $check{$1} = 1;
}

if (-s "$dataD/genomes")
{
    if (! -s "$dataD/genomes.with.job")
    {
	my $taxon_arg = $taxon ? "-t $taxon" : '';
	&SeedUtils::run("pg_submit_to_RAST -u $username -p $password -d $dataD $taxon_arg");
	my @jobs = map { ($_ =~ /\t(\d+)$/) ? $1 : () } `cat $dataD/genomes.with.job`;
	my $done = 0;
	while (! $done)
	{
	    my $sofar = 0;
	    foreach my $job (@jobs)
	    {
		if (-e "/vol/rast-prod/jobs/$job/DONE")
		{
		    $sofar++;
		}
	    }
	    $done = ($sofar == @jobs);
	    if (! $done)
	    {
		system "sleep 300";
	    }
	}
    }
    if (! -s "$dataD/genomes.with.job.and.genomeID")
    {
	&SeedUtils::run("pg_add_genomes -d $dataD");
    }
}
else
{
    system("touch", "$dataD/genomes.with.job");
    system("touch", "$dataD/genomes.with.job.and.genomeID");
}     

#
# Always run this - this lets us copy a partial directory in from a prior run
# and reuse the results computed there.
#
#if (! -d "$dataD/Corr")
{
    &SeedUtils::run("pg_compute_corr -d $dataD");
}

if(0 && ! -d "$dataD/Genomes")
{
    mkdir("$dataD/Genomes",0777);
    foreach $_ (`cut -f 3,4 $dataD/genomes.with.job.and.genomeID`)
    {
	if ($_ =~ /^(\d+)\t(\d+\.\d+)$/)
	{
	    my $genome  = $2;
	    my $job     = $1;
	    my $genomeD = "/vol/rast-prod/jobs/$job/rp/$genome";
	    mkdir("$dataD/Genomes/$genome",0777);
	    system "cp -r $genomeD/Features $genomeD/proposed* $genomeD/[GT]* $genomeD/annotations $genomeD/contigs $dataD/Genomes/$genome";
	}
    }
}


if (! -d "$dataD/Repeats")
{
    mkdir("$dataD/Repeats",0777) || die "could not make $dataD/Repeats";
    my $pg = new PG($dataD);

    foreach my $genome_dir ($pg->genome_dirs)
    {
	my($name,$id_or_dir,$rast_job,$rast_genome) = split(/\t/,$_);

	my $contigs = "$genome_dir/contigs";
	my $genome = basename($genome_dir);
	my $out = "$dataD/Repeats/$genome";
	&SeedUtils::run("svr_big_repeats -i 90 -f $contigs > $out");
     }
    &SeedUtils::run("pg_get_pegs_in_repeats -d $dataD");
}

if ((-s "$dataD/pegs.in.repeats") && (! -s "$dataD/possibly.alien.pegs"))
{
    &SeedUtils::run("pg_estimate_alien_pegs -d $dataD");
    &SeedUtils::run("pg_form_mobile_element_families -d $dataD");
}

if (! -s "$dataD/protein.families")
{
    &SeedUtils::run("pg_build_protein_families -d $dataD");
}

if (! -s "$dataD/inconsistent.core.families")
{
    &SeedUtils::run("pg_inconsistent_families -d $dataD < $dataD/core.families > $dataD/inconsistent.core.families 2> $dataD/inconsistent.core.families.anno.seed");
    &SeedUtils::run("pg_inconsistent_families -d $dataD < $dataD/non.core.families > $dataD/inconsistent.non.core.families  2> $dataD/inconsistent.non.core.families.anno.seed");
    &SeedUtils::run("tabs2rel < $dataD/protein.families | pg_function_of -d $dataD > $dataD/families.with.function");
    &SeedUtils::run("tabs2rel < $dataD/core.families | pg_function_of -d $dataD > $dataD/core.families.with.function");
    &SeedUtils::run("tabs2rel < $dataD/non.core.families | pg_function_of -d $dataD > $dataD/non.core.families.with.function");
    &SeedUtils::run("tabs2rel < $dataD/inconsistent.core.families | pg_function_of -d $dataD > $dataD/inconsistent.core.families.with.function");
    &SeedUtils::run("tabs2rel < $dataD/inconsistent.non.core.families | pg_function_of -d $dataD > $dataD/inconsistent.non.core.families.with.function");

    unlink("$dataD/events");  # force recomputation of adjacency and events
}

if (! -d "$dataD/FastaForPhylogeny")
{
    &SeedUtils::run("pg_build_fasta_for_phylogeny -d $dataD");
    &SeedUtils::run("pg_build_newick_tree -d $dataD");
}

if (! -s "$dataD/Analysis/readable.tree")
{
    my $pg = new PG($dataD);
    my @gdata = $pg->genome_data();
    my @tmp = map { [$_->[1], "$_->[1]: " . $_->[0] ] } @gdata;
    open(LABELS,">tmp$$.labels") || die "could not open tmp$$.labels";
    foreach $_ (@tmp)
    {
	print LABELS join("\t",@$_),"\n";
    }
    close(LABELS);
    &SeedUtils::run("svr_reroot_tree -m < $dataD/estimated.phylogeny.nwk | label_all_nodes > $dataD/labeled.tree");
    &SeedUtils::run("sketch_tree -a -l tmp$$.labels  < $dataD/labeled.tree > $dataD/Analysis/readable.tree");
    unlink("tmp$$.labels");
}

if (! -s "$dataD/events")
{
    &SeedUtils::run("pg_adjacency_of_core -d $dataD");
    &SeedUtils::run("pg_events -d $dataD");
    &SeedUtils::run("pg_compute_traits -d $dataD");
}

if (! -s "$dataD/role.inconsistencies")
{
    &SeedUtils::run("pg_roles_in_some_but_not_X -d $dataD");
}
if (! -s "$dataD/proposed.assignments.for.pubseed")
{
    &SeedUtils::run("pg_generate_possible_assignments -d $dataD");
}


    my @sets;
if (-s "$dataD/subsets.for.reconstructions")
{
    open(SS,"<$dataD/subsets.for.reconstructions") || die "could not open $dataD/subsets.for.reconstructions";
    $/ = "\n\/\/\n";
    @sets = <SS>;
    $/ = "\n";
    close(SS);
}
if ((@sets == 0) || ($sets[0] !~ /^All/))
{
    my @genomes = `cut -f1 $dataD/MODEL-ID`;
    @sets = ("All\n" . join("",@genomes) . "//\n",@sets);
    open(SS,">$dataD/subsets.for.reconstructions") || die "could not open $dataD/subsets.for.reconstructions";
    print SS join("",@sets);
    close(SS);
}

if (! -s "$dataD/all.models.roles.reactions.pegs")
{
    my $parmF = "$dataD/parm.for.getting.reactions.roles.pegs";
    unlink $parmF;
    open(PARMS,">$parmF") 
	|| die "could not open $dataD/parm.for.getting.reactions.roles.pegs";
    foreach $_ (`cat $dataD/MODEL-ID`)
    {
	if ($_ =~ /^(\S+)\s+(\S+)/)
	{
	    my($g,$model_id) = ($1,$2);
	    my @tmp = `cat $dataD/Workspace`;   # if the workspace is altered, the model ID files must change
	    my $ws = $tmp[0];
	    chop $ws;
	    print PARMS join("\t",($g,$model_id,$ws)),"\n";
	}
    }
    close(PARMS);
    &SeedUtils::run("pg_janaka_get_role_reactions_pegs < $parmF >> $dataD/all.models.roles.reactions.pegs");
}
if (! -s "$dataD/MODEL-ID")
{
    &SeedUtils::run("pg_make_initial_models -d $dataD");
}

if (! -d "$dataD/ReactionSets")
{
    &SeedUtils::run("pg_extract_reaction_sets -d $dataD");

    &SeedUtils::run("pg_intersection -d $dataD");
    &SeedUtils::run("pg_add_role_peg -d $dataD");
}

if (0) # (! -d "$dataD/GapfillReactions")
{
    mkdir("$dataD/GapfillReactions",0777);
    mkdir("$dataD/Reactions",0777);
    &SeedUtils::run("pg_run_gapfills -d $dataD");
    &SeedUtils::run("pg_check_gapfills -d $dataD");
    &SeedUtils::run("pg_get_models -d $dataD");
    &SeedUtils::run("pg_get_reaction_keys -d $dataD");
    &SeedUtils::run("pg_get_gapfill_reactions -d $dataD > $dataD/genome.gapfilled-reaction.complex.role 2> $dataD/genome.gapfilled-reaction");
    &SeedUtils::run("pg_get_reactions -d $dataD");
    &SeedUtils::run("pg_get_reaction_role -d $dataD > $dataD/genome.reaction.complex.role");
}

if (! -s "$dataD/md5.fams")
{
    &SeedUtils::run("pg_make_md5_families -d $dataD");
    &SeedUtils::run("pg_get_consistency_before_after -d $dataD");
}

if (! -s "$dataD/stats")
{
    &SeedUtils::run("pg_get_stats -d $dataD");
}

if (! -d "$dataD/CodonUsage")
{
    &SeedUtils::run("pg_get_codon_usage_stats -d $dataD");
}

if (! -s "$dataD/reaction.properties")
{
    &SeedUtils::run("pg_make_reaction_properties -d $dataD < $dataD/all.models.roles.reactions.pegs > $dataD/reaction.properties");
    &SeedUtils::run("place_properties_on_tree -t $dataD/labeled.tree -p $dataD/reaction.properties -e $dataD/extended.reaction.properties");
    &SeedUtils::run("where_shifts_occurred -t $dataD/labeled.tree -e $dataD/extended.reaction.properties > $dataD/Analysis/where.shifts.occurred");
    my @shifts = map { chop; [split(/\t/,$_)] } `cat $dataD/Analysis/where.shifts.occurred`;
    my %reaction_to_equation;
    foreach $_ (`cat $dataD/all.models.roles.reactions.pegs`)
    {
	if ($_ =~ /^\S+\t(rxn\S+)\t[^\t]*\t[^\t]*\t(\S[^\t]*\S)/)
	{
	    $reaction_to_equation{$1} = $2;
	}
    }
    foreach my $tuple (@shifts)
    {
	push(@$tuple,$reaction_to_equation{$tuple->[2]});
    }
    rename("$dataD/Analysis/where.shifts.occurred","$dataD/Analysis/where.shifts.occurred~");
    open(EXT,">$dataD/Analysis/where.shifts.occurred") || die "could not open $dataD/Analysis/where.shifts.occurred";
    foreach my $tuple (@shifts)
    {
	print EXT join("\t",(@$tuple)),"\n";
    }
    close(EXT);
}

system "chmod -R 777 $dataD";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3