[Bio] / FigKernelScripts / bring_tree_up_to_ali.pl Repository:
ViewVC logotype

View of /FigKernelScripts/bring_tree_up_to_ali.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Mon Jan 29 18:59:08 2007 UTC (12 years, 9 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.4: +9 -2 lines
debugged version of bring_tree_up_to_ali

########################################################################

use gjoseqlib qw( read_fasta );
use tree_utilities;
use proml qw( proml );
use protdist_neighbor qw( protdist_neighbor );

my $usage = "usage: bring_tree_up_to_ali [-c CheckpointFile] [-a Alpha] [-n NeighSz] [-t TmpDir] Ali Tree > NewTree";

my $neigh_sz    = 40;
my $check_point = undef;
my $alpha       = undef;
my $tmpdir      = $FIG_Config::temp || '/tmp';  # FIG environment?
while ($ARGV[0] =~ /^-/)
{
    $_ = shift @ARGV;
    if    ($_ =~ s/^-a//) { $alpha       = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-c//) { $check_point = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-n//) { $neigh_sz    = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-t//) { $tmpdir      = ($_ || shift @ARGV) }
    else                  { die "Error: invalid flag $flag\n$usage" }
}

my( $aliF, $treeF );
(
 ($aliF  = shift @ARGV) &&
 ($treeF = shift @ARGV)
)
    || die $usage;

#  Read the starting tree:

my( $tree, $tips, %in_tree, %in_ali );

($tree = &uproot(&parse_newick_tree(join("",`cat \"$treeF\"`))))
    || die "could not read and parse the tree";

$tips    = &tree_utilities::tips_of_tree($tree);
%in_tree = map { ( $_ => 1 ) } @$tips;

#  Read the alignment (each entry is [ $id, $definition, $seq ]):

my @align = read_fasta( $aliF );
%in_ali = map { ( $_->[0] => 1 ) } @align;

my @bad = grep { ! $in_ali{$_} } @$tips;
if ( @bad )
{
    print STDERR "ERROR: the following ids are in the tree, but not the alignment:\n\t",
                 join( ", ", sort @bad ),"\n";
    die "BAD IDs in the tree";
}

my @to_add = grep { ! $in_tree{$_} } keys %in_ali;
if (@to_add == 0) 
{ 
    print STDERR "Nothing was added\n";
    system "cat $treeF";
    exit;
}
my %to_add = map { $_ => 1 } @to_add;

&run( "cp $treeF $tmpdir/last.tree.$$" );

#  Create the neighbor-joining tree to guide addition sites:

my %nj_tree_opt = ( tree_format  => 'overbeek',
                    tmp          => "$tmpdir",
                  );
$nj_tree_opt{ alpha } = $alpha if $alpha;
my $nj_tree = protdist_neighbor::protdist_neighbor( \@align, \%nj_tree_opt );
open( NJ_TREE, ">$tmpdir/nj.outtree.$$" ) || die "Failed to open $tmpdir/nj.outtree.$$ for writing";
print NJ_TREE &tree_utilities::to_newick( $nj_tree ), "\n";
close( NJ_TREE );

foreach $id ( &order_addition( \%to_add, \@align ) )
{
    &run("insert_prot_into_tree -n $neigh_sz '$aliF' '$tmpdir/last.tree.$$' '$id' '$tmpdir/nj.outtree.$$' > '$tmpdir/new.tree.$$'");
    if ($check_point) { system "cat $tmpdir/new.tree.$$ >> $check_point"; }

    &run("cp $tmpdir/new.tree.$$ $tmpdir/last.tree.$$");
}

&run( "cat $tmpdir/new.tree.$$" );
unlink("$tmpdir/new.tree.$$");
unlink("$tmpdir/last.tree.$$");
unlink("$tmpdir/nj.outtree.$$");

exit;


sub order_addition
{
    my( $to_add, $align ) = @_;

    return map  { $_->[0] }                                # id
           sort { $b->[1] <=> $a->[1] }                    # long to short
           map  { [ $_->[0], &informative( $_->[-1] ) ] }  # [id, sequence length]
           grep { $to_add->{ $_->[0] } }                   # only those to add
           @$align;
}


sub run { ( system( $_[0] ) == 0 ) || confess( "FAILED: $cmd" ) }


sub informative { $_[0] =~ tr/ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy// }


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3