[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Tue Jan 3 19:50:32 2006 UTC revision 1.8, Sun Feb 11 18:25:48 2007 UTC
# Line 1  Line 1 
1  #  #
2  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
3  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
4  #  #
5  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 44 
44  #  #
45  #     $tree = \@rootnode;  #     $tree = \@rootnode;
46  #  #
47  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
48  #                $label, #  node label  #                $label, #  node label
49  #                $x,     #  branch length  #                $x,     #  branch length
50  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 52 
52  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
53  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
54  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
55  #             )  #             ]
56  #  #
57  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
58  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 63 
63  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
64  #  #
65  #  #
66    #  Ross Overbeek has a different tree node structure:
67    #
68    #     $node = [ Label,
69    #               DistanceToParent,
70    #               [ ParentPointer, ChildPointer1, ... ],
71    #               [ Name1\tVal1, Name2\tVal2, ... ]
72    #             ]
73    #
74    #  So:
75    #
76    #===============================================================================
77    #  Tree format interconversion:
78    #===============================================================================
79    #
80    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
81    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
82    #
83  #===============================================================================  #===============================================================================
84  #  Tree data extraction:  #  Tree data extraction:
85  #===============================================================================  #===============================================================================
# Line 92  Line 109 
109  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
110  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2)
111  #  #
112    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
113    #
114  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
115  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
116  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root
# Line 111  Line 130 
130  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
131  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )
132  #  #
133    #  Tree tip insertion point (tip is on branch of length x that
134    #  is inserted into branch connecting node1 and node2, a distance
135    #  x1 from node1 and x2 from node2):
136    #
137    #  [ $node1, $x1, $node2, $x2, $x ]
138    #           = newick_tip_insertion_point( $tree, $tip )
139    #
140    #  Standardized label for a node in terms of intersection of 3 lowest sorting
141    #  tips (sort is lower case):
142    #
143    #  @TipOrTips = std_node_name( $Tree, $Node )
144    #
145  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
146  #  Paths from root of tree:  #  Paths from root of tree:
147  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 137  Line 168 
168  #  Tree manipulations:  #  Tree manipulations:
169  #===============================================================================  #===============================================================================
170  #  #
171  #  $treecopy = copy__newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
172  #  #
173  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
174  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
175  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
176  #  before modifying.  #  before modifying.
177  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 156  Line 187 
187  #  #
188  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
189  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
190    #  $n_changed = newick_fix_negative_branches( $tree )
191    #  $node      = newick_rescale_branches( $node, $factor )
192    #
193    #  Modify comments:
194    #
195    #  $node = newick_strip_comments( $node )
196  #  #
197  #  Modify rooting and/or order:  #  Modify rooting and/or order:
198  #  #
# Line 170  Line 207 
207  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
208  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
209  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
210    #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
211    #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
212  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
213  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
214  #  #
# Line 177  Line 216 
216  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
217  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
218  #  #
219    #  $newtree = collapse_zero_length_branches( $tree )
220    #
221    #  $node = newick_insert_at_node( $node, $subtree )
222    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
223    #
224  #===============================================================================  #===============================================================================
225  #  Tree reading and writing:  #  Tree reading and writing:
226  #===============================================================================  #===============================================================================
227  #  #
228  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
229  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
230  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
231  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
232  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
233  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
234  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
235  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
236  #  #
237    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
238    #  @trees = read_newick_trees( $file ) # reads to end of file
239  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
240  #  #
241  #===============================================================================  #===============================================================================
242    
243    
244    use Carp;
245    use Data::Dumper;
246    
247  require Exporter;  require Exporter;
248    
249  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
250  our @EXPORT = qw(  our @EXPORT = qw(
251            overbeek_to_gjonewick
252            gjonewick_to_overbeek
253    
254            newick_is_valid
255          newick_is_rooted          newick_is_rooted
256          newick_is_unrooted          newick_is_unrooted
257          tree_rooted_on_tip          tree_rooted_on_tip
# Line 226  Line 279 
279          tip_to_tip_distance          tip_to_tip_distance
280          node_to_node_distance          node_to_node_distance
281    
282          copy__newick_tree          copy_newick_tree
283    
284          newick_relabel_nodes          newick_relabel_nodes
285          newick_relabel_nodes_i          newick_relabel_nodes_i
# Line 235  Line 288 
288    
289          newick_set_undefined_branches          newick_set_undefined_branches
290          newick_set_all_branches          newick_set_all_branches
291            newick_fix_negative_branches
292            newick_rescale_branches
293    
294            newick_strip_comments
295    
296          normalize_newick_tree          normalize_newick_tree
297          reverse_newick_tree          reverse_newick_tree
# Line 249  Line 306 
306          reroot_newick_to_node          reroot_newick_to_node
307          reroot_newick_to_node_ref          reroot_newick_to_node_ref
308          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
309            reroot_newick_to_approx_midpoint_w
310          uproot_tip_rooted_newick          uproot_tip_rooted_newick
311          uproot_newick          uproot_newick
312    
313          prune_from_newick          prune_from_newick
314          newick_subtree          newick_subtree
315            collapse_zero_length_branches
316    
317            newick_insert_at_node
318            newick_insert_between_nodes
319    
320          writeNewickTree          writeNewickTree
321          fwriteNewickTree          fwriteNewickTree
322          strNewickTree          strNewickTree
323          formatNewickTree          formatNewickTree
324    
325            read_newick_tree
326            read_newick_trees
327          parse_newick_tree_str          parse_newick_tree_str
328    
329          printer_plot_newick          printer_plot_newick
# Line 299  Line 364 
364    
365  use gjolists qw(  use gjolists qw(
366          common_prefix          common_prefix
         common_and_unique  
367          unique_suffixes          unique_suffixes
368    
         unique_set  
369          duplicates          duplicates
370          random_order          random_order
371    
         union  
372          intersection          intersection
373          set_difference          set_difference
374          );          );
# Line 324  Line 386 
386    
387    
388  #===============================================================================  #===============================================================================
389    #  Interconvert Overbeek and gjonewick trees:
390    #===============================================================================
391    
392    sub overbeek_to_gjonewick
393    {
394        return () unless ref( $_[0] ) eq 'ARRAY';
395        my ( $lbl, $x, $desc ) = @{ $_[0] };
396        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
397        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
398    }
399    
400    sub gjonewick_to_overbeek
401    {
402        return () unless ref( $_[0] ) eq 'ARRAY';
403        my ( $desc, $lbl, $x ) = @{ $_[0] };
404        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
405        my $parent = $_[1];
406        my $node = [ $lbl, $x, undef, [] ];
407        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
408        return $node;
409    }
410    
411    #===============================================================================
412  #  Extract tree structure values:  #  Extract tree structure values:
413  #===============================================================================  #===============================================================================
414  #  #
# Line 344  Line 429 
429  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
430    
431  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]
432  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
433  sub newick_x        { $_[0]->[2] }  sub newick_x        { $_[0]->[2] }
434  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { $_[0]->[3] }
435  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { $_[0]->[4] }
# Line 419  Line 504 
504  #===============================================================================  #===============================================================================
505  #  Some tree property tests:  #  Some tree property tests:
506  #===============================================================================  #===============================================================================
507    #  Tree is valid?
508    #
509    #  $bool = newick_is_valid( $node, $verbose )
510    #-------------------------------------------------------------------------------
511    sub newick_is_valid
512    {
513        my $node = shift;
514    
515        if ( ! array_ref( $node ) )
516        {
517            print STDERR "Node is not array reference\n" if $_[0];
518            return 0;
519        }
520    
521        my @node = @$node;
522        if ( ! @node )
523        {
524            print STDERR "Node is empty array reference\n" if $_[0];
525            return 0;
526        }
527    
528        # Must have descendant or label:
529    
530        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
531        {
532            print STDERR "Node has neither descendant nor label\n" if $_[0];
533            return 0;
534        }
535    
536        #  If comments are present, they must be array references
537    
538        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
539        {
540            if ( defined( $_ ) && ! array_ref( $_ ) )
541            {
542                print STDERR "Node has neither descendant or label\n" if $_[0];
543                return 0;
544            }
545        }
546    
547        #  Inspect the descendants:
548    
549        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
550        {
551            newick_is_valid( $_, @_ ) || return 0
552        }
553    
554        return 1;
555    }
556    
557    
558    #-------------------------------------------------------------------------------
559  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
560  #  #
561  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 572  Line 709 
709  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
710  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
711  sub newick_duplicated_tips {  sub newick_duplicated_tips {
712      duplicates( newick_tip_list( $_[0] ) );      gjolists::duplicates( newick_tip_list( $_[0] ) );
713  }  }
714    
715    
# Line 608  Line 745 
745      my ( $Tree1, $Tree2 ) = @_;      my ( $Tree1, $Tree2 ) = @_;
746      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my ( @Tips1 ) = newick_tip_list( $Tree1 );
747      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my ( @Tips2 ) = newick_tip_list( $Tree2 );
748      intersection( \@Tips1, \@Tips2 );      gjolists::intersection( \@Tips1, \@Tips2 );
749  }  }
750    
751    
# Line 704  Line 841 
841    
842    
843  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
844    #  Tree tip insertion point (with standard node labels):
845    #
846    #  [ $node1, $x1, $node2, $x2, $x ]
847    #           = newick_tip_insertion_point( $tree, $tip )
848    #
849    #  Which means: tip is on a branch of length x that is inserted into the branch
850    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
851    #
852    #                x1    +------ n1a (lowest sorting tip of this subtree)
853    #            +--------n1
854    #            |         +------n1b (lowest sorting tip of this subtree)
855    #  tip-------n
856    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
857    #            +------n2
858    #               x2   +-------- n2b (lowest sorting tip of this subtree)
859    #
860    #  The designations of 1 vs 2, and a vs b are chosen such that:
861    #     n1a < n1b, and n2a < n2b, and n1a < n2a
862    #
863    #  Then the statandard description becomes:
864    #
865    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
866    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
867    #    x
868    #  ]
869    #
870    #-------------------------------------------------------------------------------
871    sub newick_tip_insertion_point
872    {
873        my ( $tree, $tip ) = @_;
874        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
875        $tree = copy_newick_tree( $tree )           or return undef;
876        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
877        my $node = $tree;
878    
879        my $x  = 0;                        # Distance to node
880        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
881        $node  = $dl->[0];                 # Node adjacent to tip
882        $dl    = newick_desc_ref( $node );
883        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
884        {
885            $node = $dl->[0];
886            $x   += newick_x( $node );
887            $dl   = newick_desc_ref( $node );
888        }
889        $x += newick_x( $node );
890    
891        #  We are now at the node that is the insertion point.
892        #  Is it a tip?
893    
894        my @description;
895    
896        if ( ( ! $dl ) || @$dl == 0 )
897        {
898            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
899        }
900    
901        #  Is it a trifurcation or greater, in which case it does not go
902        #  away with tip deletion?
903    
904        elsif ( @$dl > 2 )
905        {
906            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
907        }
908    
909        #  The node is bifurcating.  We need to describe it.
910    
911        else
912        {
913            my ( $n1, $x1 ) = describe_desc( $dl->[0] );
914            my ( $n2, $x2 ) = describe_desc( $dl->[1] );
915    
916            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
917            if ( @$n2 == 2 )
918            {
919                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
920            }
921            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
922            @description = ( $n1, $x1, $n2, $x2, $x );
923        }
924    
925        return wantarray ? @description : \@description;
926    }
927    
928    
929    sub describe_desc
930    {
931        my $node = shift;
932    
933        my $x  = 0;                        # Distance to node
934        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
935        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
936        {
937            $node = $dl->[0];
938            $x   += newick_x( $node );
939            $dl   = newick_desc_ref( $node );
940        }
941        $x += newick_x( $node );
942    
943        #  Is it a tip?  Return list of one tip;
944    
945        if ( ( ! $dl ) || @$dl == 0 )
946        {
947            return ( [ newick_lbl( $node ) ], $x );
948        }
949    
950        #  Get tips of each descendent, keeping lowest sorting from each.
951        #  Return the two lowest of those (the third will come from the
952        #  other side of the original node).
953    
954        else
955        {
956            my @rep_tips = sort { lc $a cmp lc $b }
957                           map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
958                           @$dl;
959            return ( [ @rep_tips[0,1] ], $x );
960        }
961    }
962    
963    
964    #-------------------------------------------------------------------------------
965  #  Standard node name:  #  Standard node name:
966  #     Tip label if at a tip  #     Tip label if at a tip
967  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
# Line 726  Line 984 
984      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
985      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
986    
987      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
988      my @best = map {      my @best = map {
989              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
990              @rest = set_difference( \@rest, \@tips );              @rest = gjolists::set_difference( \@rest, \@tips );
991              $tips[0];              $tips[0];
992          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
993    
# Line 848  Line 1106 
1106      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1107    
1108      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1109      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = gjolists::common_prefix( \@p1, \@p2 );
1110      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = gjolists::common_prefix( \@p1, \@p3 );
1111      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = gjolists::common_prefix( \@p2, \@p3 );
1112    
1113      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1114      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 901  Line 1159 
1159      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1160    
1161      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1162      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost
1163      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1164      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1165    
# Line 922  Line 1180 
1180    
1181      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1182                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1183      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1184      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1185    
1186      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1187      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost
1188      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1189      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1190    
# Line 942  Line 1199 
1199  #  Lists are copied, except that references to empty lists go to undef.  #  Lists are copied, except that references to empty lists go to undef.
1200  #  Only defined fields are added, so tree list may be shorter than 8 fields.  #  Only defined fields are added, so tree list may be shorter than 8 fields.
1201  #  #
1202  #  $treecopy = copy__newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
1203  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1204  sub copy__newick_tree {  sub copy_newick_tree {
1205      my ( $node ) = @_;      my ( $node ) = @_;
1206      array_ref( $node ) || return undef;      array_ref( $node ) || return undef;
1207    
1208      my $nn = [];  #  Reference to a new node structure      my $nn = [];  #  Reference to a new node structure
1209      #  Build a new descendant list, if not empty      #  Build a new descendant list, if not empty
1210      my @dl = newick_desc_list( $node );      my @dl = newick_desc_list( $node );
1211      set_newick_desc_ref( $nn, @dl ? [ map { copy__newick_tree( $_ ) } @dl ]      set_newick_desc_ref( $nn, @dl ? [ map { copy_newick_tree( $_ ) } @dl ]
1212                                    : undef                                    : undef
1213                         );                         );
1214    
# Line 1142  Line 1399 
1399      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1400    
1401      my $n = 0;      my $n = 0;
1402      if ( $not_root ) {      if ( $not_root )
1403        {
1404          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1405          $n++;          $n++;
1406      }      }
1407    
1408      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1409        {
1410          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1411      }      }
1412    
# Line 1156  Line 1415 
1415    
1416    
1417  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1418    #  Rescale all branch lenghts by factor.
1419    #
1420    #  $node = newick_rescale_branches( $node, $factor )
1421    #-------------------------------------------------------------------------------
1422    sub newick_rescale_branches {
1423        my ( $node, $factor ) = @_;
1424    
1425        my $x = newick_x( $node );
1426        set_newick_x( $node, $factor * $x ) if $x;
1427    
1428        foreach ( newick_desc_list( $node ) )
1429        {
1430            newick_rescale_branches( $_, $factor );
1431        }
1432    
1433        $node;
1434    }
1435    
1436    
1437    #-------------------------------------------------------------------------------
1438    #  Set negative branches to zero.  The original tree is modfied.
1439    #
1440    #  $n_changed = newick_fix_negative_branches( $tree )
1441    #-------------------------------------------------------------------------------
1442    sub newick_fix_negative_branches {
1443        my ( $tree ) = @_;
1444        array_ref( $tree ) or return undef;
1445        my $n_changed = 0;
1446        my $x = newick_x( $tree );
1447        if ( defined( $x ) and $x < 0 )
1448        {
1449            set_newick_x( $tree, 0 );
1450            $n_changed++;
1451        }
1452    
1453        foreach ( newick_desc_list( $tree ) )
1454        {
1455            $n_changed += newick_fix_negative_branches( $_ );
1456        }
1457    
1458        $n_changed;
1459    }
1460    
1461    
1462    #-------------------------------------------------------------------------------
1463    #  Remove comments from a newick tree (e.g., before writing for phylip).
1464    #
1465    #  $node = newick_strip_comments( $node )
1466    #-------------------------------------------------------------------------------
1467    sub newick_strip_comments {
1468        my ( $node ) = @_;
1469    
1470        @$node = @$node[ 0 .. 2 ];
1471        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1472        $node;
1473    }
1474    
1475    
1476    #-------------------------------------------------------------------------------
1477  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1478  #  #
1479  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1371  Line 1689 
1689      #  Reorder this subtree:      #  Reorder this subtree:
1690    
1691      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1692      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = gjolists::random_order( @$dl_ref );
1693    
1694      #  Reorder descendants:      #  Reorder descendants:
1695    
# Line 1399  Line 1717 
1717      #      #
1718      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1719      #      #
1720      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1721    
1722      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1723      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1414  Line 1732 
1732    
1733      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1734      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1735      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1736    
1737      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1738    
# Line 1500  Line 1818 
1818  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1819  sub reroot_newick_to_approx_midpoint {  sub reroot_newick_to_approx_midpoint {
1820      my ( $tree ) = @_;      my ( $tree ) = @_;
1821    
1822      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
1823    
1824      my $dists1 = average_to_tips_1( $tree );      my $dists1 = average_to_tips_1( $tree );
# Line 1563  Line 1882 
1882    
1883      #  The root must be some where below this node:      #  The root must be some where below this node:
1884    
1885      my $n_1      =   @$desc_list + ( $anc_node ? 1 : 0 ) - 1;      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
1886      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
1887    
1888      foreach ( @$desc_list )      foreach ( @$desc_list )
# Line 1582  Line 1901 
1901    
1902    
1903  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1904    #  Move root of tree to an approximate midpoint.  Weight by tips.
1905    #
1906    #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )
1907    #-------------------------------------------------------------------------------
1908    sub reroot_newick_to_approx_midpoint_w {
1909        my ( $tree ) = @_;
1910    
1911        #  Compile average tip to node distances assending
1912    
1913        my $dists1 = average_to_tips_1_w( $tree );
1914    
1915        #  Compile average tip to node distances descending, returning midpoint node
1916    
1917        my $node = average_to_tips_2_w( $dists1, undef, undef, undef );
1918    
1919        #  Reroot
1920    
1921        $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree
1922    }
1923    
1924    
1925    sub average_to_tips_1_w {
1926        my ( $node ) = @_;
1927    
1928        my @desc_dists = map { average_to_tips_1_w( $_ ) } newick_desc_list( $node );
1929        my $x_below = 0;
1930        my $n_below = 1;
1931        if ( @desc_dists )
1932        {
1933            $n_below = 0;
1934            my $n;
1935            foreach ( @desc_dists )
1936            {
1937                $n_below += $n = $_->[1];
1938                $x_below += $n * $_->[0];
1939            }
1940            $x_below /= $n_below;
1941        }
1942        my $x = newick_x( $node ) || 0;
1943        my $x_net = $x_below + $x;
1944    
1945        [ $x_net, $n_below, $x, $x_below, [ @desc_dists ], $node ]
1946    }
1947    
1948    
1949    sub average_to_tips_2_w {
1950        my ( $dists1, $x_above, $n_above, $anc_node ) = @_;
1951        my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;
1952    
1953        #  Are we done?  Root is in this node's branch, or "above"?
1954    
1955        # defined( $x_above ) and print STDERR "x_above = $x_above\n";
1956        # print STDERR "x       = $x\n";
1957        # print STDERR "x_below = $x_below\n";
1958        # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";
1959    
1960        if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
1961        {
1962            #  At this point the root can only be in this node's branch,
1963            #  or "above" it in the current rooting of the tree (which
1964            #  would mean that the midpoint is actually down a different
1965            #  path from the root of the current tree).
1966            #
1967            #  Is the root in the current branch?
1968    
1969            if ( ( $x_below + $x ) >= $x_above )
1970            {
1971                return ( $x_above >= $x_below ) ? $anc_node : $node;
1972            }
1973            else
1974            {
1975                return undef;
1976            }
1977        }
1978    
1979        #  The root must be some where below this node:
1980    
1981        $n_above ||= 0;
1982        my $n = $n_above + $n_below;
1983        my $ttl_w_dist = ( $n_below * $x_below )
1984                       + ( defined( $x_above ) ? $n_above * ( $x_above + $x ) : 0 );
1985    
1986        foreach ( @$desc_list )
1987        {
1988            my $n_2      = $_->[1];    # n in subtree
1989            my $n_above2 = $n - $n_2;  # tip rooted has 1 above
1990    
1991            #  If input tree is tip_rooted, $n_above2 can be 0, so:
1992    
1993            my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
1994                                     : 0;
1995            my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
1996            if ( $root ) { return $root }
1997        }
1998    
1999        #  Was not anywhere below this node (oh-oh):
2000    
2001        return undef;
2002    }
2003    
2004    
2005    #-------------------------------------------------------------------------------
2006  #  Move root of tree from tip to adjacent node.  #  Move root of tree from tip to adjacent node.
2007  #  #
2008  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
# Line 1684  Line 2105 
2105    
2106    
2107  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2108    #  Collapse zero-length branches to make multifurcation.  The original tree
2109    #  is modified.
2110    #
2111    #  $tree = collapse_zero_length_branches( $tree )
2112    #  $tree = collapse_zero_length_branches( $tree, $not_root )
2113    #-------------------------------------------------------------------------------
2114    sub collapse_zero_length_branches {
2115        my ( $tree, $not_root ) = @_;
2116        array_ref( $tree ) || return undef;
2117    
2118        my @desc = newick_desc_list( $tree );
2119        @desc or return ( $tree );              # Cannot collapse terminal branch
2120    
2121        #  Analyze descendants:
2122    
2123        $not_root ||= 0;
2124        my @new_desc = ();
2125        my $changed = 0;
2126        foreach ( @desc )
2127        {
2128            my ( undef, @to_add ) = collapse_zero_length_branches( $_, $not_root+1 );
2129            if ( @to_add )
2130            {
2131                push @new_desc, @to_add;
2132                $changed = 1;
2133            }
2134            else
2135            {
2136                push @new_desc, $_;
2137            }
2138        }
2139        set_newick_desc_ref( $tree, [ @new_desc ] ) if $changed;
2140    
2141        #  Collapse if not root, not tip and zero (or negative) branch:
2142    
2143        my $collapse = $not_root && @new_desc && ( newick_x( $tree ) <= 0 ) ? 1 : 0;
2144        ( $tree, ( $collapse ? @new_desc : () ) );
2145    }
2146    
2147    #-------------------------------------------------------------------------------
2148    #  Add a subtree to a newick tree node:
2149    #
2150    #  $node = newick_insert_at_node( $node, $subtree )
2151    #-------------------------------------------------------------------------------
2152    sub newick_insert_at_node
2153    {
2154        my ( $node, $subtree ) = @_;
2155        array_ref( $node ) && array_ref( $subtree ) or return undef;
2156    
2157        #  We could check validity of trees, but ....
2158    
2159        my $dl = newick_desc_ref( $node );
2160        if ( array_ref( $dl ) )
2161        {
2162            push @$dl, $subtree;
2163        }
2164        else
2165        {
2166            set_newick_desc_ref( $node, [ $subtree ] );
2167        }
2168        return $node;
2169    }
2170    
2171    
2172    #-------------------------------------------------------------------------------
2173    #  Insert a subtree into a newick tree along the path between 2 nodes:
2174    #
2175    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2176    #-------------------------------------------------------------------------------
2177    sub newick_insert_between_nodes
2178    {
2179        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2180        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2181        $fraction >= 0 && $fraction <= 1 or return undef;
2182    
2183        #  Find the paths to the nodes:
2184    
2185        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2186        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2187    
2188        #  Trim the common prefix:
2189    
2190        while ( $path1[1] == $path2[1] )
2191        {
2192            splice( @path1, 0, 2 );
2193            splice( @path2, 0, 2 );
2194        }
2195    
2196        my ( @path, $dist );
2197        if    ( @path1 < 3 )
2198        {
2199            @path2 >= 3 or return undef;              # node1 = node2
2200            $dist = $fraction* newick_path_length( @path2 );
2201            @path = @path2;
2202        }
2203        elsif ( @path2 < 3 )
2204        {
2205            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2206            @path = @path1;
2207        }
2208        else
2209        {
2210            my $dist1 = newick_path_length( @path1 );
2211            my $dist2 = newick_path_length( @path2 );
2212            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2213            @path = ( $dist <= 0 ) ? @path1 : @path2;
2214            $dist = abs( $dist );
2215        }
2216    
2217        #  Descend tree until we reach the insertion branch:
2218    
2219        my $x;
2220        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2221        {
2222            $dist -= $x;
2223            splice( @path, 0, 2 );
2224        }
2225    
2226        #  Insert the new node:
2227    
2228        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2229        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2230    
2231        return $tree;
2232    }
2233    
2234    
2235    #-------------------------------------------------------------------------------
2236  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
2237  #     Caveat:  if one tip is listed, the original tree is modified.  #     Caveat:  if one tip is listed, the original tree is modified.
2238  #              if more than one tip is listed, a copy of the tree is returen  #              if more than one tip is listed, a copy of the tree is returned
2239  #                   (even if it is just listing the same tip twice!).  #                   (even if it is just listing the same tip twice!).
2240  #  #
2241  #  $newtree = prune_from_newick( $tree,  $tip  )  #  $newtree = prune_from_newick( $tree,  $tip  )
# Line 1842  Line 2391 
2391  #  Tree writing and reading  #  Tree writing and reading
2392  #  #
2393  #===============================================================================  #===============================================================================
2394  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
2395    #  writeNewickTree( $tree, $file )
2396    #  writeNewickTree( $tree, \*FH )
2397  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2398  sub writeNewickTree {  sub writeNewickTree {
2399      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
2400      $file || ( $file = *STDOUT );      my ( $fh, $close ) = open_output( $file );
2401      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
2402        print  $fh  ( strNewickTree( $tree ), "\n" );
2403        close $fh if $close;
2404  }  }
2405    
2406    
# Line 1990  Line 2543 
2543    
2544    
2545  #===============================================================================  #===============================================================================
2546    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
2547    #  @trees = read_newick_trees( $file ) # reads to end of file
2548    #===============================================================================
2549    
2550    sub read_newick_tree
2551    {
2552        my $file = shift;
2553        my ( $fh, $close ) = open_input( $file );
2554        my $tree;
2555        my @lines = ();
2556        while ( defined( $_ = <$fh> ) )
2557        {
2558            chomp;
2559            push @lines, $_;
2560            if ( /;/ )
2561            {
2562                $tree = parse_newick_tree_str( join( ' ', @lines ) );
2563                last;
2564            }
2565        }
2566        close $fh if $close;
2567    
2568        $tree;
2569    }
2570    
2571    
2572    sub read_newick_trees
2573    {
2574        my $file = shift;
2575        my ( $fh, $close ) = open_input( $file );
2576        my @trees = ();
2577        my @lines = ();
2578        while ( defined( $_ = <$fh> ) )
2579        {
2580            chomp;
2581            push @lines, $_;
2582            if ( /;/ )
2583            {
2584                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
2585                @lines = ()
2586            }
2587        }
2588        close $fh if $close;
2589    
2590        @trees;
2591    }
2592    
2593    
2594    #===============================================================================
2595  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
2596  #  #
2597  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2191  Line 2793 
2793  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
2794  #  #
2795  #     $node   newick tree root node  #     $node   newick tree root node
2796  #     $file   undef (= *STDOUT), *STDOUT, *STDERR, or a file name.  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.
2797  #     $width  the approximate characters for the tree without labels  #     $width  the approximate characters for the tree without labels
2798  #     $min_dx the minimum horizontal branch length  #     $min_dx the minimum horizontal branch length
2799  #     $dy     the vertical space per taxon  #     $dy     the vertical space per taxon
2800  #  #
2801  #  printer_plot_newick( $node, $file (D=*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )
2802  #===============================================================================  #===============================================================================
2803  sub printer_plot_newick {  sub printer_plot_newick {
2804      my ( $node, $file, $width, $min_dx, $dy ) = @_;      my ( $node, $file, $width, $min_dx, $dy ) = @_;
2805    
2806      my ( $fh, $close );      my ( $fh, $close ) = open_output( $file );
2807      if ( ! defined( $file ) ) {      $fh or return;
         $fh = *STDOUT;  
     }  
     elsif ( $file =~ /^\*/ ) {  
         $fh = $file;  
     }  
     else {  
         open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  
         $close = 1;  
     }  
2808    
2809      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";
2810      if ( $close ) { close $fh }      if ( $close ) { close $fh }
# Line 2237  Line 2830 
2830    
2831      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
2832      $dy     = int( $dy );      $dy     = int( $dy );
2833      # RAE: Had an error:      my $x_scale = $width / ( newick_max_X( $node ) || 1 );  # Div by zero caught by RAE
     # Illegal division by zero at /disks/space0/fig/FIGdisk.dev/dist/releases/ross/linux-gentoo/lib/FigKernelPackages/gjonewicklib.pm line 2240, <TREE> line 4., referer: http://listeria.uchicago.edu/dev/FIG/protein.cgi?prot=fig%7C220341.1.peg.3792&sims=1&fid=fig%7C220341.1.peg.3792&user=&translate=0&maxN=500&max_expand=5&maxP=1e-05&select=figx&sort_by=bits&group_by_genome=1&resubmit=resubmit  
     # therefore set newick_max_X to 1 if it is not defined  
   
     my $x_scale = $width / (newick_max_X( $node ) or 1);  
     #my $x_scale = $width / newick_max_X( $node );  
2834    
2835      my $hash = {};      my $hash = {};
2836      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
# Line 2383  Line 2971 
2971  }  }
2972    
2973    
2974    #===============================================================================
2975    #  Open an input file stream:
2976    #
2977    #     ( $handle, undef ) = open_input(       );  # \*STDIN
2978    #     ( $handle, undef ) = open_input( \*FH  );
2979    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
2980    #
2981    #===============================================================================
2982    sub open_input
2983    {
2984        my $file = shift;
2985        my $fh;
2986        if    ( ! defined( $file ) )     { return ( \*STDIN ) }
2987        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
2988        elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
2989    
2990        print STDERR "gjonewick::open_input could not open '$file' for reading\n";
2991        return undef;
2992    }
2993    
2994    
2995    #===============================================================================
2996    #  Open an output file stream:
2997    #
2998    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
2999    #     ( $handle, undef ) = open_output( \*FH );
3000    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
3001    #
3002    #===============================================================================
3003    sub open_output
3004    {
3005        my $file = shift;
3006        my $fh;
3007        if    ( ! defined( $file ) )     { return ( \*STDOUT ) }
3008        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
3009        elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
3010    
3011        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
3012        return undef;
3013    }
3014    
3015  1;  1;

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3