[Bio] / FigKernelScripts / pg_compute_traits.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_compute_traits.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Sat Jun 1 12:44:54 2013 UTC (6 years, 5 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +2 -1 lines
fix to pass2

########################################################################
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use strict;
use Data::Dumper;
use Carp;
use Getopt::Long;
use SeedEnv;

my $usage = "usage: pg_compute_traits -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

use tree_utilities;
(-s "$dataD/estimated.phylogeny.nwk") || die "$dataD/estimated.phylogeny.nwk is empty";

my $tree;
($tree = &parse_newick_tree(join("",`cat $dataD/estimated.phylogeny.nwk`)))
    || die "could not parse the tree";

&take_out_bootstrap($tree);
my $rooted = &root_at_midpoint($tree);
&label_all_nodes($rooted);
&write_newick($rooted,"$dataD/rooted.labeled.tree.nwk");

open(TREE,"| sketch_tree > $dataD/rooted.labeled.tree") || die "could not open $dataD/rooted.labele.tree";
print TREE &to_newick($rooted);
close(TREE);

my ($traits,$node_to_traits) = &place_events_on_leaves($dataD);
foreach my $trait (@$traits)
{
    &place_trait_on_tree($trait,$node_to_traits,$rooted);
}

open(OUT,">$dataD/traits.on.nodes") || die "could not open $dataD/traits.on.nodes";
foreach my $node (sort keys(%$node_to_traits))
{
    my $traitH = $node_to_traits->{$node};
    foreach my $trait (sort keys(%$traitH))
    {
	my $v = $node_to_traits->{$node}->{$trait};
	if (! $v) { die "HERE" }
	print OUT join("\t",($node,$trait,join(",",sort(@$v)))),"\n";
    }
}
close(OUT);

open(OUT,"| sort > $dataD/placed.events") || die "could not open $dataD/placed.events";
&placed_events($rooted,$node_to_traits,\*OUT);
close(OUT);

sub placed_events {
    my($tree,$node_to_traits,$fh) = @_;

    my $this_node = $tree->[0];
    my $this_node_traits = $node_to_traits->{$this_node};
    my @traits_for_this_node = grep { @{$this_node_traits->{$_}} == 1 } keys(%$this_node_traits);
    my $i;
    for ($i=1; ($i < @{$tree->[2]}); $i++)
    {
	&placed_events($tree->[2]->[$i],$node_to_traits,$fh);
	my $child = $tree->[2]->[$i]->[0];
	my $child_traits = $node_to_traits->{$child};
	foreach my $trait (@traits_for_this_node)
	{
	    my $child_val;
	    if (($child_val = $child_traits->{$trait}) && (@$child_val == 1))
	    {
		my $this_node_val = $this_node_traits->{$trait}->[0];
		if ($child_val->[0] ne $this_node_val)
		{
		    print $fh join("\t",($this_node,$child,$trait,$this_node_val,$child_val->[0])),"\n";
		}
	    }
	}
    }
}

sub place_trait_on_tree {
    my($trait,$node_to_trait,$tree) = @_;
    my $values = {};
    &pass1($trait,$tree,$node_to_trait,$values);
    &pass2($trait,$tree,$node_to_trait,$values);
    foreach my $node (keys(%$values))
    {
	if (@{$values->{$node}} > 0) 
	{
	    $node_to_trait->{$node}->{$trait} = $values->{$node};
	}
    }
}

sub pass1 {
    my($trait,$tree,$node_to_trait,$values) = @_;

    if (@{$tree->[2]} == 1)
    {
	my $v = $node_to_trait->{$tree->[0]}->{$trait};
	if (! $v) { $v = [] }
	$values->{$tree->[0]} = $v;
    }
    else
    {
	my $i;
	my @current_values;
	for ($i=1; ($i < @{$tree->[2]}); $i++)
	{
	    &pass1($trait,$tree->[2]->[$i],$node_to_trait,$values);
	    push(@current_values,$values->{$tree->[2]->[$i]->[0]});
	}
	$values->{$tree->[0]} = &iu(\@current_values);
    }
}

sub pass2 {
    my($trait,$tree,$node_to_trait,$values) = @_;

    if (@{$tree->[2]} == 1)
    {
	my $v = $node_to_trait->{$tree->[0]}->{$trait};
	if (! $v) { $v = [] }
	$values->{$tree->[0]} = $v;
    }
    else
    {
	my $i;
	for ($i=1; ($i < @{$tree->[2]}); $i++)
	{
	    my $overlap  = &i([$values->{$tree->[0]},$values->{$tree->[2]->[$i]->[0]}]);
	    if (@$overlap > 0)
	    {
		$values->{$tree->[2]->[$i]->[0]} = $overlap;
	    }
	    &pass2($trait,$tree->[2]->[$i],$node_to_trait,$values);
	}
    }
}

sub iu {
    my($values) = @_;

    my $x = &i($values);
    if (@$x > 0)
    {
	return $x;
    }
    return &u($values);
}

sub i {
    my($values) = @_;
    my %counts;
    foreach my $v (@$values)
    {
	if (! ref($v)) { confess('BAD') }
	foreach my $x (@$v)
	{
	    $counts{$x}++;
	}
    }
    my @tmp = grep { $counts{$_} == @$values } keys(%counts);
    return \@tmp;
}
	    
sub u {
    my($values) = @_;

    my %counts;
    foreach my $v (@$values)
    {
	foreach my $x (@$v)
	{
	    $counts{$x}++;
	}
    }
    return [keys(%counts)];
}

sub place_events_on_leaves {
    my($dataD) = @_;

    my $node_to_traits = {};
    my %traits;
    foreach my $tuple ( map { my($f1,$o2,$f2,$o1,$g) = split(/\t/,$_); [$f1,$o1,$f2,$o2,$g] } `cat $dataD/events`)
    {
	my($f1,$o1,$f2,$o2,$g) = @$tuple;
	my $trait = join(":",($f1,$o1));
	my $val   = join(":",($f2,$o2));
	$node_to_traits->{$g}->{$trait} = [$val];
	$traits{$trait} = 1;
    }
    my $traits = [sort keys(%traits)];
    return($traits,$node_to_traits);
}

sub take_out_bootstrap {
    my($tree) = @_;

    if (@{$tree->[2]} > 1)
    {
	$tree->[0] = "";
    }
    my $i;
    for ($i=1; ($i < @{$tree->[2]}); $i++)
    {
	&take_out_bootstrap($tree->[2]->[$i]);
    }
}

sub root_at_midpoint {
    my($tree) = @_;

    my $uprooted   = &uproot($tree);
    my $tree_index = &tree_index_tables($uprooted);
    my @furthest;
    my $i;
    for ($i=1; ($i < @{$tree->[2]}); $i++)
    {
	my($leaf,$dist) = &most_distant($tree->[2]->[$i]);
	push(@furthest,[$leaf,$dist]);
    }
    @furthest = sort { $b->[1] <=> $a->[1] } @furthest;
    my $rooted =  &root_tree_between_nodes($furthest[0]->[0],$furthest[1]->[0],0.5,$tree_index);
    return $rooted;
}

sub most_distant {
    my($node) = @_;

    if (@{$node->[2]} == 1)
    {
	return ($node,$node->[1]);
    }

    my $i;
    my $furthest;
    my $sofar = 0;
    
    for ($i=1; ($i < @{$node->[2]}); $i++)
    {
	my($furthest1,$dist1) = &most_distant($node->[2]->[$i]);
	if ((! $furthest) || ($dist1 > $sofar))
	{
	    $sofar = $dist1;
	    $furthest = $furthest1;
	}
    }
    return ($furthest,$sofar);
}

	

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3