[Bio] / FigKernelScripts / best_tree_using_proml_on_cluster.pl Repository:
ViewVC logotype

View of /FigKernelScripts/best_tree_using_proml_on_cluster.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sat Apr 28 14:47:53 2007 UTC (12 years, 9 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
add best_tree_using_proml_on_cluster

########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2007 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
########################################################################


$usage = "usage: best_tree_using_proml_on_cluster [-a alpha] Alignment NodeF NodeL MustComplete > Tree

This command is used to generate trees by running proml multiple times
on the cluster, keeping the tree with the best score.  You run the
command using something like

	best_tree_using_proml_on_cluster my.alignment.fasta 1 55 50 > best.tree

where the first argument gives the file containing the alignment,
the second and third specify the range of nodes to use (in this
example, nodes 1 through 55), and the last parameter indicates how
many of the nodes must complete.  Occasionally, the cluster does have
bad nodes, so I recommend making the last parameter 1 or 2 less than
the number of nodes you use.

The best likelihood will be written to stderr, along with the output
of a killall used to kill off any remaining processes after enough
complete. 
";

my $alpha = undef;
while ($ARGV[0] =~ /^-/)
{
    $_ = shift @ARGV;
    if ($_ =~ s/^-a//) { $alpha = ($_ || shift @ARGV) }
    else               { die "Bad Flag: $_" }
}

(
 ($aliF          = shift @ARGV) &&
 ($nodeF         = shift @ARGV) &&
 ($nodeL         = shift @ARGV) &&
 ($must_complete = shift @ARGV) && ($must_complete > 0) && ($must_complete <= (($nodeL - $nodeF) + 1))
)
    || die $usage;

$_ = join("",map { chomp; $_ } `pwd`);
$tmpD = "$_/best_tree_using_proml_on_cluster.$$";
mkdir($tmpD,0777)  || die "could not make $tmpD";

print STDERR "using temporary directory = $tmpD\n";

mkdir("$tmpD/Output",0777) || die "could not make $tmpD/Output";
mkdir("$tmpD/Stderr",0777) || die "could not make $tmpD/Stderr";

my $cmdF = "$tmpD/cmd";
open(CMD,">$cmdF") || die "aborted";

my $cmd = <<'CMD__FILE';
#!/usr/bin/perl

my $dir = "/tmp/proml.$$";
mkdir($dir,0777) || die "could not make $dir";

my $alpha = <<ALPHA>>;

open(INFILE,">$dir/infile") || die "could not generate infile in $dir";
print INFILE <<END__ALI;
<<ALI>>
END__ALI
close(INFILE);

open(PARMS,">$dir/parms") || die "could not open $dir/parms";
$val = (int(rand() * 500000000) * 2) + 1;
print PARMS "J\n$val\n1\n";
if    ($alpha) { print PARMS "R\nY\n",1 / sqrt($alpha),"\n5\n"; }
else           { print PARMS "Y\n"; }
close(PARMS);
system "cd $dir; proml < parms > /dev/null; cat outfile; echo '//'; cat outtree; echo '///'";
system "/bin/rm -r $dir";
CMD__FILE

if ($alpha)
{
    $cmd =~ s/<<ALPHA>>/$alpha/;
}
else
{
    $cmd =~ s/<<ALPHA>>/undef/;
}
open(ALI,"fasta_to_phylip < $aliF |") || die "could not open $ali";
my $ali = join("",<ALI>);
$cmd =~ s/<<ALI>>/$ali/;
print CMD $cmd;
close(CMD);
system "chmod +x $cmdF";

for ($i = $nodeF; ($i <= $nodeL); $i++)
{
    system "ssh -n bio-ppc-$i '$tmpD/cmd < /dev/null > $tmpD/Output/$i 2> $tmpD/Stderr/$i' &";
}

$done = 0;
while (! $done)
{
    ($lk,$tree) = &test_for_complete("$tmpD/Output",$must_complete);
    if (defined($tree))
    {
	print "$tree\n";
	print STDERR "Ln Likelihood = $lk\n";
	system "pdsh -w 'bio-ppc-[$nodeF-$nodeL]' 'killall $tmpD/cmd'";
	$done = 1;
    }
    if (! $tree) { sleep 30 }
}
system "/bin/rm -r $tmpD";

sub test_for_complete {
    my($dir,$n) = @_;

    my($best_sofar,$tree);
    if (opendir(DIR,$dir))
    {
	my @out = grep { $_ =~ /^\d+$/ } readdir(DIR);
	closedir(DIR);
	if ($n <= @out)
	{
	    
	    foreach my $file (@out)
	    {
		my $out1 = join("",`cat $dir/$file`);
		if (($out1 =~ /\nLn Likelihood =\s*(\S+).*\n\/\/\n(.*)\/\/\//s) &&
		    ((! defined($best_sofar)) || ($best_sofar > $1)))
		{
		    $best_sofar = $1;
		    $tree       = $2;
		}
	    }
	}
    }
    return ($best_sofar,$tree);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3