[Bio] / FigKernelScripts / insert_prot_into_tree.pl Repository:
ViewVC logotype

View of /FigKernelScripts/insert_prot_into_tree.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (download) (as text) (annotate)
Wed Jan 31 21:30:36 2007 UTC (12 years, 9 months ago) by golsen
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.13: +1 -1 lines
Continuing the update of tree packages and programs

########################################################################

use InsertIntoTree;
use tree_utilities;
use Carp;
use tree_neighborhood;
use FIG;

my $usage = "usage: insert_prot_into_tree [-a alpha] [-n NeighSz] Ali Tree Id NJtree [Weights] > new_tree";


my $size_rep = 40;
my $alpha = undef;
while ($ARGV[0] =~ /^-/)
{
    $_ = shift @ARGV;
    if ($_ =~ s/^-a//)    { $alpha = ($_ || shift @ARGV) }
    elsif ($_ =~ s/^-n//) { $size_rep = ($_ || shift @ARGV) }
    else                  { die "Bad Flag: $_" }
}

($ali = shift @ARGV) 
    || die "bad alignment: $usage";

(($tree_file = shift @ARGV) && (-s $tree_file))
    || die "missing tree: $usage";

($id_to_ins = shift @ARGV)
    || die "missing id to insert: $usage";

($nj_treeF = shift @ARGV)
    || die "missing NJtree: $usage";

$nj_tree = &tree_utilities::parse_newick_tree(join("",`cat $nj_treeF`));

($tree = &uproot(&parse_newick_tree(join("",`cat \"$tree_file\"`))))
    || die "could not parse the tree";

&tree_utilities::label_all_nodes($tree);
$big_indexes = &tree_utilities::tree_index_tables($tree);

##############  get the tree ##########
$tips    = &tree_utilities::tips_of_tree($tree);
@in_tree = ();
foreach $x (@$tips)
{
    $in_tree{$x} = 1;
}

if ($in_tree{$id_to_ins})
{
    die "$id_to_ins is already in the tree";
}
############### load relevant sequences from alignment ###########

open(ALI,"<$ali") || die "could not open $ali";
$_ = <ALI>;
while ($_ && ($_ =~ /^>(\S+)/))
{
    $id = $1;
	
    @lines = ();
    while (defined($_ = <ALI>) && ($_ !~ /^>/))
    {
	chomp;
	push(@lines,$_);
    }

    if ($in_tree{$id} || ($id eq $id_to_ins) ||
	($in_tree{"\'$id\'"} && ($id = "\'$id\'")))
    {
	$seq = join("",@lines);
	$seq =~ tr/a-z/A-Z/;
	$seq =~ s/[~.]/\?/g;
	$x   = [$seq];               # this is a hack, I want the assoc array
	$seq_of{$id} = \($x->[0]);   # to contain pointers to the seqs (not the seqs)
    }
}
close(ALI);

$bad = 0;
foreach $id (@$tips)
{
    if ((! $seq_of{$id}) && (! $seq_of{"\'$id\'"}))
    {
	$bad = 1;
	warn "The tree contains $id, but it is not in the alignment\n";
    }
}
(! $bad) || die "Could not insert - alignment is missing sequences";

$subtree = &tree_neighborhood::focused_neighborhood($tree,$id_to_ins,$nj_tree,$size_rep);
$x = &tree_utilities::display_tree($subtree);
if ($ENV{'VERBOSE'}) { print STDERR "Initial Representative Tree:\n\n$x\n"; }

##################################################
@seen = ();

while (! &seen($subtree))
{
    ($where,$len_to_add) = &sequence_ins_point($subtree,$id_to_ins,\%seq_of);
    my($node1,$node2,$frac) = @$where;
    my $node1_label = $node1->[0];
    my $node2_label = $node2->[0];

    if ($ENV{'VERBOSE'}) {    print STDERR "node1=@{$where->[0]} node2=@{$where->[1]} frac=$where->[2] len_to_add=$len_to_add\n"; }

    my $big_node1 = &tree_utilities::label_to_node($big_indexes,$node1_label);
    my $big_node2 = &tree_utilities::label_to_node($big_indexes,$node2_label);
    $subtree = &focused_neighborhood_of_tree_point($tree,
						   $big_indexes,
						   [$big_node1,$big_node2,$frac],
						   $size_rep);
    $x       = &tree_utilities::display_tree($subtree,{});
    if ($ENV{'VERBOSE'}) 
    {    
	print STDERR "new neighborhood\n";
	print STDERR $x;
    }
}

my $updated_tree = &add_branch($big_indexes,$where,$len_to_add,$id_to_ins);
&tree_utilities::unlabel_internal_nodes($updated_tree);

$new_tree = &tree_utilities::to_newick($updated_tree);
print "$new_tree\n";
if ($ENV{'VERBOSE'}) {    print STDERR "successfully inserted $id_to_ins\n"; }

sub seen {
    my($tree) = @_;
    my($tips,@sorted,$key);

    $tips = &tips_of_tree($tree);
    @sorted = sort @$tips;
    $key = join(" ",@sorted);

    if (! $seen{$key})
    {
	$seen{$key} = 1;
	return 0;
    }
    return 1;
}

sub add_branch {
    my($tabP,$where,$len_to_add,$id) = @_;
    my($nd1,$nd2,$node1,$node2,$fract,$node,$desc);

    ($node1,$node2,$fract) = @$where;
    $nd1 = &locate_node($node1,$tabP);
    $nd2 = &locate_node($node2,$tabP);
    $tree = &tree_utilities::root_tree_between_nodes($nd1,$nd2,$fract,$tabP);
    $node = [$id,$len_to_add,[$tree]];
    $desc = $tree->[2];
    push(@$desc,$node);
    return $tree;
}


sub sequence_ins_point
{
    my( $tree, $id, $seq_of, $alpha ) = @_;

    my( $id_seqs, $tips, $id1, $seq1 );
    my( $x );
    my($indexes,$at_node,$sz1,$sz2,$len_to_add,$where);
    my($tips1,$tips2,$node1,$node2,$d1,$d2,$frac);

    if ($ENV{'VERBOSE'}) { print STDERR "entering sequence_ins+point\n"; }

    $id_seqs = [];
    $tips = &tree_utilities::tips_of_tree($tree);
    foreach $id1 ( @$tips, $id )
    {
	$seq1 = $seq_of->{$id1};
	$seq1 || confess "could not locate sequence for $id1";
	push( @$id_seqs, [ $id1, $$seq1 ] );
    }

    my %options = ( align => $id_seqs, 
                    tree  => $tree,
                    id    => $id,
                    alpha => $alpha
                  );

    $options{ alpha } = $alpha if $alpha;

    my $new_tree = &InsertIntoTree::insert_prot_into_tree( \%options );

    $indexes = &tree_utilities::tree_index_tables( $new_tree );
    $at_node = &tree_utilities::root_tree_at_node( $indexes, $id );

    $sz1 = &tree_length( $tree );
    $sz2 = &tree_length( $at_node->[2]->[1] ) - $at_node->[2]->[1]->[1];
    if (($sz1 >= 0) && ($sz2 > 0))
    {
	$len_to_add = $at_node->[2]->[1]->[1] * ($sz1 / $sz2);
    }
    else
    {
	$len_to_add = 0;
    }

    if ($ENV{'VERBOSE'}) { print STDERR "sz1=$sz1 sz2=$sz2 len_to_add=$len_to_add\n"; }
    $descendant1 = $at_node->[2]->[1]->[2]->[1];
    $descendant2 = $at_node->[2]->[1]->[2]->[2];
    $node1 = &rooted_identify($descendant1);
    $node2 = &rooted_identify($descendant2);
    if (@$node1 == 2) { push(@$node1,$node2->[0]); }
    if (@$node2 == 2) { push(@$node2,$node1->[0]); }

    $d1 = $descendant1->[1];
    $d2 = $descendant2->[1];
    if (($d1  + $d2 ) > 0)
    {
	$frac = ($d1 / ($d1+$d2));
    }
    else
    {
	$frac = 0.5;
    }
    if ($ENV{'VERBOSE'}) { print STDERR "node1=@$node1 node2=@$node2 frac=$frac\n"; }
    $where = [$node1,$node2,$frac];
#
# Set $where
#
    if ($ENV{'VERBOSE'}) { print STDERR "entering sequence_ins+point\n"; }
    return ($where,$len_to_add);
}

sub tree_length {
    my($tree) = @_;
    my($desc,$ln,$i);

    $desc = $tree->[2];
    for ($ln=0,$i=1; ($i <= $#{$desc}); $i++)
    {
	$ln += &tree_length($desc->[$i]) + $desc->[$i]->[1];
    }
    return $ln;
}

sub rooted_identify {
    my($tree) = @_;
    my($cc);

    $cc = $tree->[2];
    if (@$cc == 1) { return [$tree->[0]]; }
    if (@$cc == 2) { return ""; }
    return [&node_of_tree($cc->[1]),&node_of_tree($cc->[2])];
}

sub node_of_tree {
    my($tree) = @_;

    while ($#{$tree->[2]} > 0)
    {
	$tree = $tree->[2]->[1];
    }
    return $tree->[0];
}

sub focused_neighborhood_of_tree_point {
    my($tree,$big_indexes,$where,$size_rep) = @_;

    if ($ENV{'VERBOSE'}) { print STDERR "entering focused_neighborhood_of_tree_point\n"; }
    my($node1,$node2,$frac) = @$where;
    my $tree1   = &tree_utilities::root_tree_between_nodes($node1,$node2,$frac,$big_indexes);
    my @tips  = &tree_neighborhood::n_tips_for_neighborhood($tree1,$size_rep);
    my %hash2 = map { $_ => 1 } @tips;
    if ($ENV{'VERBOSE'}) { print STDERR "leaving focused_neighborhood_of_tree_point\n"; }
    return &subtree($tree,\%hash2);
}

							   

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3