[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Fri Jan 12 23:45:29 2007 UTC revision 1.8, Sun Feb 11 18:25:48 2007 UTC
# Line 1  Line 1 
1  #  #
2  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
3  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
4  #  #
5  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 109  Line 109 
109  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
110  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2)
111  #  #
112    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
113    #
114  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
115  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
116  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root
# Line 128  Line 130 
130  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
131  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )
132  #  #
133    #  Tree tip insertion point (tip is on branch of length x that
134    #  is inserted into branch connecting node1 and node2, a distance
135    #  x1 from node1 and x2 from node2):
136    #
137    #  [ $node1, $x1, $node2, $x2, $x ]
138    #           = newick_tip_insertion_point( $tree, $tip )
139    #
140    #  Standardized label for a node in terms of intersection of 3 lowest sorting
141    #  tips (sort is lower case):
142    #
143    #  @TipOrTips = std_node_name( $Tree, $Node )
144    #
145  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
146  #  Paths from root of tree:  #  Paths from root of tree:
147  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 171 
171  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
172  #  #
173  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
174  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
175  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
176  #  before modifying.  #  before modifying.
177  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 176  Line 190 
190  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
191  #  $node      = newick_rescale_branches( $node, $factor )  #  $node      = newick_rescale_branches( $node, $factor )
192  #  #
193    #  Modify comments:
194    #
195    #  $node = newick_strip_comments( $node )
196    #
197  #  Modify rooting and/or order:  #  Modify rooting and/or order:
198  #  #
199  #  $nrmtree = normalize_newick_tree( $tree )  #  $nrmtree = normalize_newick_tree( $tree )
# Line 200  Line 218 
218  #  #
219  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
220  #  #
221    #  $node = newick_insert_at_node( $node, $subtree )
222    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
223    #
224  #===============================================================================  #===============================================================================
225  #  Tree reading and writing:  #  Tree reading and writing:
226  #===============================================================================  #===============================================================================
# Line 220  Line 241 
241  #===============================================================================  #===============================================================================
242    
243    
244    use Carp;
245    use Data::Dumper;
246    
247  require Exporter;  require Exporter;
248    
249  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
# Line 227  Line 251 
251          overbeek_to_gjonewick          overbeek_to_gjonewick
252          gjonewick_to_overbeek          gjonewick_to_overbeek
253    
254            newick_is_valid
255          newick_is_rooted          newick_is_rooted
256          newick_is_unrooted          newick_is_unrooted
257          tree_rooted_on_tip          tree_rooted_on_tip
# Line 266  Line 291 
291          newick_fix_negative_branches          newick_fix_negative_branches
292          newick_rescale_branches          newick_rescale_branches
293    
294            newick_strip_comments
295    
296          normalize_newick_tree          normalize_newick_tree
297          reverse_newick_tree          reverse_newick_tree
298          std_unrooted_newick          std_unrooted_newick
# Line 287  Line 314 
314          newick_subtree          newick_subtree
315          collapse_zero_length_branches          collapse_zero_length_branches
316    
317            newick_insert_at_node
318            newick_insert_between_nodes
319    
320          writeNewickTree          writeNewickTree
321          fwriteNewickTree          fwriteNewickTree
322          strNewickTree          strNewickTree
# Line 399  Line 429 
429  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
430    
431  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]
432  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
433  sub newick_x        { $_[0]->[2] }  sub newick_x        { $_[0]->[2] }
434  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { $_[0]->[3] }
435  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { $_[0]->[4] }
# Line 474  Line 504 
504  #===============================================================================  #===============================================================================
505  #  Some tree property tests:  #  Some tree property tests:
506  #===============================================================================  #===============================================================================
507    #  Tree is valid?
508    #
509    #  $bool = newick_is_valid( $node, $verbose )
510    #-------------------------------------------------------------------------------
511    sub newick_is_valid
512    {
513        my $node = shift;
514    
515        if ( ! array_ref( $node ) )
516        {
517            print STDERR "Node is not array reference\n" if $_[0];
518            return 0;
519        }
520    
521        my @node = @$node;
522        if ( ! @node )
523        {
524            print STDERR "Node is empty array reference\n" if $_[0];
525            return 0;
526        }
527    
528        # Must have descendant or label:
529    
530        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
531        {
532            print STDERR "Node has neither descendant nor label\n" if $_[0];
533            return 0;
534        }
535    
536        #  If comments are present, they must be array references
537    
538        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
539        {
540            if ( defined( $_ ) && ! array_ref( $_ ) )
541            {
542                print STDERR "Node has neither descendant or label\n" if $_[0];
543                return 0;
544            }
545        }
546    
547        #  Inspect the descendants:
548    
549        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
550        {
551            newick_is_valid( $_, @_ ) || return 0
552        }
553    
554        return 1;
555    }
556    
557    
558    #-------------------------------------------------------------------------------
559  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
560  #  #
561  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 759  Line 841 
841    
842    
843  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
844    #  Tree tip insertion point (with standard node labels):
845    #
846    #  [ $node1, $x1, $node2, $x2, $x ]
847    #           = newick_tip_insertion_point( $tree, $tip )
848    #
849    #  Which means: tip is on a branch of length x that is inserted into the branch
850    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
851    #
852    #                x1    +------ n1a (lowest sorting tip of this subtree)
853    #            +--------n1
854    #            |         +------n1b (lowest sorting tip of this subtree)
855    #  tip-------n
856    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
857    #            +------n2
858    #               x2   +-------- n2b (lowest sorting tip of this subtree)
859    #
860    #  The designations of 1 vs 2, and a vs b are chosen such that:
861    #     n1a < n1b, and n2a < n2b, and n1a < n2a
862    #
863    #  Then the statandard description becomes:
864    #
865    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
866    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
867    #    x
868    #  ]
869    #
870    #-------------------------------------------------------------------------------
871    sub newick_tip_insertion_point
872    {
873        my ( $tree, $tip ) = @_;
874        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
875        $tree = copy_newick_tree( $tree )           or return undef;
876        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
877        my $node = $tree;
878    
879        my $x  = 0;                        # Distance to node
880        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
881        $node  = $dl->[0];                 # Node adjacent to tip
882        $dl    = newick_desc_ref( $node );
883        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
884        {
885            $node = $dl->[0];
886            $x   += newick_x( $node );
887            $dl   = newick_desc_ref( $node );
888        }
889        $x += newick_x( $node );
890    
891        #  We are now at the node that is the insertion point.
892        #  Is it a tip?
893    
894        my @description;
895    
896        if ( ( ! $dl ) || @$dl == 0 )
897        {
898            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
899        }
900    
901        #  Is it a trifurcation or greater, in which case it does not go
902        #  away with tip deletion?
903    
904        elsif ( @$dl > 2 )
905        {
906            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
907        }
908    
909        #  The node is bifurcating.  We need to describe it.
910    
911        else
912        {
913            my ( $n1, $x1 ) = describe_desc( $dl->[0] );
914            my ( $n2, $x2 ) = describe_desc( $dl->[1] );
915    
916            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
917            if ( @$n2 == 2 )
918            {
919                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
920            }
921            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
922            @description = ( $n1, $x1, $n2, $x2, $x );
923        }
924    
925        return wantarray ? @description : \@description;
926    }
927    
928    
929    sub describe_desc
930    {
931        my $node = shift;
932    
933        my $x  = 0;                        # Distance to node
934        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
935        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
936        {
937            $node = $dl->[0];
938            $x   += newick_x( $node );
939            $dl   = newick_desc_ref( $node );
940        }
941        $x += newick_x( $node );
942    
943        #  Is it a tip?  Return list of one tip;
944    
945        if ( ( ! $dl ) || @$dl == 0 )
946        {
947            return ( [ newick_lbl( $node ) ], $x );
948        }
949    
950        #  Get tips of each descendent, keeping lowest sorting from each.
951        #  Return the two lowest of those (the third will come from the
952        #  other side of the original node).
953    
954        else
955        {
956            my @rep_tips = sort { lc $a cmp lc $b }
957                           map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
958                           @$dl;
959            return ( [ @rep_tips[0,1] ], $x );
960        }
961    }
962    
963    
964    #-------------------------------------------------------------------------------
965  #  Standard node name:  #  Standard node name:
966  #     Tip label if at a tip  #     Tip label if at a tip
967  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
# Line 781  Line 984 
984      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
985      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
986    
987      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
988      my @best = map {      my @best = map {
989              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
990              @rest = gjolists::set_difference( \@rest, \@tips );              @rest = gjolists::set_difference( \@rest, \@tips );
991              $tips[0];              $tips[0];
992          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
# Line 977  Line 1180 
1180    
1181      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1182                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1183      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1184      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1185    
1186      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1187      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost
# Line 1258  Line 1460 
1460    
1461    
1462  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1463    #  Remove comments from a newick tree (e.g., before writing for phylip).
1464    #
1465    #  $node = newick_strip_comments( $node )
1466    #-------------------------------------------------------------------------------
1467    sub newick_strip_comments {
1468        my ( $node ) = @_;
1469    
1470        @$node = @$node[ 0 .. 2 ];
1471        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1472        $node;
1473    }
1474    
1475    
1476    #-------------------------------------------------------------------------------
1477  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1478  #  #
1479  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1501  Line 1717 
1717      #      #
1718      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1719      #      #
1720      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1721    
1722      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1723      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1516  Line 1732 
1732    
1733      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1734      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1735      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1736    
1737      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1738    
# Line 1928  Line 2144 
2144      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2145  }  }
2146    
2147    #-------------------------------------------------------------------------------
2148    #  Add a subtree to a newick tree node:
2149    #
2150    #  $node = newick_insert_at_node( $node, $subtree )
2151    #-------------------------------------------------------------------------------
2152    sub newick_insert_at_node
2153    {
2154        my ( $node, $subtree ) = @_;
2155        array_ref( $node ) && array_ref( $subtree ) or return undef;
2156    
2157        #  We could check validity of trees, but ....
2158    
2159        my $dl = newick_desc_ref( $node );
2160        if ( array_ref( $dl ) )
2161        {
2162            push @$dl, $subtree;
2163        }
2164        else
2165        {
2166            set_newick_desc_ref( $node, [ $subtree ] );
2167        }
2168        return $node;
2169    }
2170    
2171    
2172    #-------------------------------------------------------------------------------
2173    #  Insert a subtree into a newick tree along the path between 2 nodes:
2174    #
2175    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2176    #-------------------------------------------------------------------------------
2177    sub newick_insert_between_nodes
2178    {
2179        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2180        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2181        $fraction >= 0 && $fraction <= 1 or return undef;
2182    
2183        #  Find the paths to the nodes:
2184    
2185        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2186        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2187    
2188        #  Trim the common prefix:
2189    
2190        while ( $path1[1] == $path2[1] )
2191        {
2192            splice( @path1, 0, 2 );
2193            splice( @path2, 0, 2 );
2194        }
2195    
2196        my ( @path, $dist );
2197        if    ( @path1 < 3 )
2198        {
2199            @path2 >= 3 or return undef;              # node1 = node2
2200            $dist = $fraction* newick_path_length( @path2 );
2201            @path = @path2;
2202        }
2203        elsif ( @path2 < 3 )
2204        {
2205            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2206            @path = @path1;
2207        }
2208        else
2209        {
2210            my $dist1 = newick_path_length( @path1 );
2211            my $dist2 = newick_path_length( @path2 );
2212            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2213            @path = ( $dist <= 0 ) ? @path1 : @path2;
2214            $dist = abs( $dist );
2215        }
2216    
2217        #  Descend tree until we reach the insertion branch:
2218    
2219        my $x;
2220        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2221        {
2222            $dist -= $x;
2223            splice( @path, 0, 2 );
2224        }
2225    
2226        #  Insert the new node:
2227    
2228        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2229        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2230    
2231        return $tree;
2232    }
2233    
2234    
2235  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2236  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3