[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Sun Feb 11 18:25:48 2007 UTC revision 1.25, Sun Oct 3 23:03:57 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2007 University of Chicago and Fellowship  # Copyright (c) 2003-2010 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 20  Line 22 
22  #===============================================================================  #===============================================================================
23  #  perl functions for dealing with trees  #  perl functions for dealing with trees
24  #  #
25  #  Usage:  #  Usage:  use gjonewicklib
 #      use gjonewicklib  
 #  
26  #  #
27  #===============================================================================  #===============================================================================
28  #  Tree data structures:  #  Tree data structures:
# Line 77  Line 77 
77  #  Tree format interconversion:  #  Tree format interconversion:
78  #===============================================================================  #===============================================================================
79  #  #
80    #  $bool      = is_overbeek_tree( $tree )
81    #  $bool      = is_gjonewick_tree( $tree )
82    #
83  #  $gjonewick = overbeek_to_gjonewick( $overbeek )  #  $gjonewick = overbeek_to_gjonewick( $overbeek )
84  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
85  #  #
# Line 96  Line 99 
99  #  @desclist = newick_desc_list( $noderef )  #  @desclist = newick_desc_list( $noderef )
100  #  $n        = newick_n_desc( $noderef )  #  $n        = newick_n_desc( $noderef )
101  #  $descref  = newick_desc_i( $noderef, $i )    # 1-based numbering  #  $descref  = newick_desc_i( $noderef, $i )    # 1-based numbering
102  #  $bool     = newick_is_tip( $noderef )  #
103    #  $bool     = node_is_tip( $noderef )
104    #  $bool     = node_is_valid( $noderef )
105    #  $bool     = node_has_lbl( $noderef )
106    #  $bool     = node_lbl_is( $noderef, $label )
107  #  #
108  #  set_newick_desc_ref( $noderef, $listref )  #  set_newick_desc_ref( $noderef, $listref )
109  #  set_newick_lbl( $noderef, $label )  #  set_newick_lbl( $noderef, $label )
# Line 107  Line 114 
114  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
115  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
116  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
117  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
118  #  #
119  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
120  #  #
# Line 118  Line 125 
125  #  #
126  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
127  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
128    # \@tiprefs = newick_tip_ref_list( $noderef )
129  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
130    # \@tips    = newick_tip_list( $noderef )
131    #
132  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
133  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
134    #
135  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
136    # \@tips    = newick_duplicated_tips( $noderef )
137    #
138  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
139    #
140  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
141    # \@tips    = newick_shared_tips( $tree1, $tree2 )
142  #  #
143  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
144    #
145    #  %tip_distances = newick_tip_distances( $noderef )
146    # \%tip_distances = newick_tip_distances( $noderef )
147    #
148  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
149  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
150  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
151    #
152    #  Provide a standard name by which two trees can be compared for same topology
153    #
154    #  $stdname = std_tree_name( $tree )
155  #  #
156  #  Tree tip insertion point (tip is on branch of length x that  #  Tree tip insertion point (tip is on branch of length x that
157  #  is inserted into branch connecting node1 and node2, a distance  #  is inserted into branch connecting node1 and node2, a distance
158  #  x1 from node1 and x2 from node2):  #  x1 from node1 and x2 from node2):
159  #  #
160  #  [ $node1, $x1, $node2, $x2, $x ]  #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
 #           = newick_tip_insertion_point( $tree, $tip )  
161  #  #
162  #  Standardized label for a node in terms of intersection of 3 lowest sorting  #  Standardized label for a node in terms of intersection of 3 lowest sorting
163  #  tips (sort is lower case):  #  tips (sort is lower case):
164  #  #
165  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
166  #  #
167  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
168  #  Paths from root of tree:  #  Paths from root of tree:
# Line 151  Line 173 
173  #      () is returned upon failure  #      () is returned upon failure
174  #  #
175  #  @path = path_to_tip( $treenode, $tipname )  #  @path = path_to_tip( $treenode, $tipname )
176    # \%paths = paths_to_tips( $treenode, \@%tips )
177  #  @path = path_to_named_node( $treenode, $nodename )  #  @path = path_to_named_node( $treenode, $nodename )
178    # \%paths = paths_to_named_nodes( $treenode, \@names )
179  #  @path = path_to_node_ref( $treenode, $noderef )  #  @path = path_to_node_ref( $treenode, $noderef )
180  #  #
181  #  @path = path_to_node( $node,   $tip1, $tip2, $tip3   )  #  3 tip names  #  @path  = path_to_node( $node,   $name1, $name2, $name3   )  #  3 node names
182  #  @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] )  #  Array of tips  #  @path  = path_to_node( $node, [ $name1, $name2, $name3 ] )  #  Array of names
183  #  @path = path_to_node( $node,   $tip1                 )  #  Use path_to_tip  #  @path  = path_to_node( $node,   $name1, $name2   )          #  2 node names
184  #  @path = path_to_node( $node, [ $tip1 ]               )  #  Use path_to_tip  #  @path  = path_to_node( $node, [ $name1, $name2 ] )          #  Array of names
185    #  @path  = path_to_node( $node,   $name1   )                  #  1 node name
186    #  @path  = path_to_node( $node, [ $name1 ] )                  #  Array with name
187  #  #
188  #  $distance = newick_path_length( @path )  #  $distance = newick_path_length( @path )
189  #  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )  #  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )
# Line 189  Line 215 
215  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
216  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
217  #  $node      = newick_rescale_branches( $node, $factor )  #  $node      = newick_rescale_branches( $node, $factor )
218    #  $node      = newick_random_branch_lengths( $node, $x1, $x2 )
219    #  $node      = newick_modify_branches( $node, \&function )
220    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
221  #  #
222  #  Modify comments:  #  Modify comments:
223  #  #
# Line 201  Line 230 
230  #  $stdtree = std_unrooted_newick( $tree )  #  $stdtree = std_unrooted_newick( $tree )
231  #  $newtree = aesthetic_newick_tree( $tree, $direction )  #  $newtree = aesthetic_newick_tree( $tree, $direction )
232  #  $rndtree = random_order_newick_tree( $tree )  #  $rndtree = random_order_newick_tree( $tree )
233    #  $newtree - reroot_tree( $tree, \%options )
234  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
235  #  $newtree = reroot_newick_to_tip( $tree, $tip )  #  $newtree = reroot_newick_to_tip( $tree, $tip )
236  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
237  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
238  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
239  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
240    #  $newtree = reroot_newick_at_dist_between_nodes( $tree, $node1, $node2, $distance )
241    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
242    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
243  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
244  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
245  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
246  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
247  #  #
248  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
249    #  $newtree = rooted_newick_subtree( $tree,  @tips )
250    #  $newtree = rooted_newick_subtree( $tree, \@tips )
251  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
252  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
253    #  $newtree = newick_covering_subtree( $tree,  @tips )
254    #  $newtree = newick_covering_subtree( $tree, \@tips )
255  #  #
256  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
257  #  #
# Line 222  Line 259 
259  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
260  #  #
261  #===============================================================================  #===============================================================================
262    #  Tree neighborhood: subtree of n tips to represent a larger tree.
263    #===============================================================================
264    #
265    #  Focus around root:
266    #
267    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
268    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
269    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
270    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
271    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
272    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
273    #
274    #  Focus around a tip insertion point (the tip is not in the subtree):
275    #
276    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
277    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
278    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
279    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
280    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
281    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
282    #
283    #===============================================================================
284    #  Random trees
285    #===============================================================================
286    #
287    #   $tree = random_equibranch_tree(  @tips, \%options )
288    #   $tree = random_equibranch_tree( \@tips, \%options )
289    #   $tree = random_equibranch_tree(  @tips )
290    #   $tree = random_equibranch_tree( \@tips )
291    #
292    #   $tree = random_ultrametric_tree(  @tips, \%options )
293    #   $tree = random_ultrametric_tree( \@tips, \%options )
294    #   $tree = random_ultrametric_tree(  @tips )
295    #   $tree = random_ultrametric_tree( \@tips )
296    #
297    #===============================================================================
298  #  Tree reading and writing:  #  Tree reading and writing:
299  #===============================================================================  #===============================================================================
300    #  Write machine-readable trees:
301  #  #
302  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
303  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
# Line 231  Line 305 
305  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
306  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
307  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
308    #
309    #  Write human-readable trees:
310    #
311  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
312  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
313  #  #
314    #  Read trees:
315    #
316  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  $tree  = read_newick_tree( $file )  # reads to a semicolon
317  #  @trees = read_newick_trees( $file ) # reads to end of file  #  @trees = read_newick_trees( $file ) # reads to end of file
318  #  $tree  = parse_newick_tree_str( $string )  #  $tree  = parse_newick_tree_str( $string )
# Line 243  Line 322 
322    
323  use Carp;  use Carp;
324  use Data::Dumper;  use Data::Dumper;
325    use strict;
326    
327  require Exporter;  require Exporter;
328    
329  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
330  our @EXPORT = qw(  our @EXPORT = qw(
331            is_overbeek_tree
332            is_gjonewick_tree
333          overbeek_to_gjonewick          overbeek_to_gjonewick
334          gjonewick_to_overbeek          gjonewick_to_overbeek
   
335          newick_is_valid          newick_is_valid
336          newick_is_rooted          newick_is_rooted
337          newick_is_unrooted          newick_is_unrooted
338          tree_rooted_on_tip          tree_rooted_on_tip
339          newick_is_bifurcating          newick_is_bifurcating
340          newick_tip_count          newick_tip_count
341            newick_tip_ref_list
342          newick_tip_list          newick_tip_list
343    
344          newick_first_tip          newick_first_tip
345          newick_duplicated_tips          newick_duplicated_tips
346          newick_tip_in_tree          newick_tip_in_tree
347          newick_shared_tips          newick_shared_tips
348    
349          newick_tree_length          newick_tree_length
350            newick_tip_distances
351          newick_max_X          newick_max_X
352          newick_most_distant_tip_ref          newick_most_distant_tip_ref
353          newick_most_distant_tip_name          newick_most_distant_tip_name
354    
355          std_newick_name          newick_tip_insertion_point
356    
357            std_tree_name
358    
359          path_to_tip          path_to_tip
360          path_to_named_node          path_to_named_node
# Line 290  Line 376 
376          newick_set_all_branches          newick_set_all_branches
377          newick_fix_negative_branches          newick_fix_negative_branches
378          newick_rescale_branches          newick_rescale_branches
379            newick_random_branch_lengths
380            newick_modify_branches
381    
382          newick_strip_comments          newick_strip_comments
383    
# Line 300  Line 388 
388          unaesthetic_newick_tree          unaesthetic_newick_tree
389          random_order_newick_tree          random_order_newick_tree
390    
391            reroot_tree
392          reroot_newick_by_path          reroot_newick_by_path
393          reroot_newick_to_tip          reroot_newick_to_tip
394          reroot_newick_next_to_tip          reroot_newick_next_to_tip
395          reroot_newick_to_node          reroot_newick_to_node
396          reroot_newick_to_node_ref          reroot_newick_to_node_ref
397            reroot_newick_between_nodes
398            reroot_newick_at_dist_between_nodes
399            reroot_newick_to_midpoint
400            reroot_newick_to_midpoint_w
401          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
402          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
403          uproot_tip_rooted_newick          uproot_tip_rooted_newick
404          uproot_newick          uproot_newick
405    
406          prune_from_newick          prune_from_newick
407            rooted_newick_subtree
408          newick_subtree          newick_subtree
409            newick_covering_subtree
410          collapse_zero_length_branches          collapse_zero_length_branches
411    
412          newick_insert_at_node          newick_insert_at_node
413          newick_insert_between_nodes          newick_insert_between_nodes
414    
415            root_neighborhood_representative_tree
416            root_neighborhood_representative_tips
417            tip_neighborhood_representative_tree
418            tip_neighborhood_representative_tips
419    
420            random_equibranch_tree
421            random_ultrametric_tree
422    
423          writeNewickTree          writeNewickTree
424          fwriteNewickTree          fwriteNewickTree
425          strNewickTree          strNewickTree
# Line 342  Line 445 
445          newick_desc_list          newick_desc_list
446          newick_n_desc          newick_n_desc
447          newick_desc_i          newick_desc_i
448          newick_is_tip  
449          newick_is_valid          node_is_tip
450            node_is_valid
451            node_has_lbl
452            node_lbl_is
453    
454          set_newick_desc_ref          set_newick_desc_ref
455          set_newick_lbl          set_newick_lbl
# Line 362  Line 468 
468          );          );
469    
470    
 use gjolists qw(  
         common_prefix  
         unique_suffixes  
   
         duplicates  
         random_order  
   
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
471  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
472  #  Internally used definitions  #  Internally used definitions
473  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
474    
475  sub array_ref { ref( $_[0] ) eq "ARRAY" }  sub array_ref { $_[0] && ref( $_[0] ) eq 'ARRAY' }
476  sub hash_ref  { ref( $_[0] ) eq "HASH"  }  sub hash_ref  { $_[0] && ref( $_[0] ) eq 'HASH'  }
477    
478    sub max       { $_[0] >= $_[1] ? $_[0] : $_[1] }
479    sub min       { $_[0] <= $_[1] ? $_[0] : $_[1] }
480    
481    
482  #===============================================================================  #===============================================================================
483  #  Interconvert Overbeek and gjonewick trees:  #  Interconvert overbeek and gjonewick trees:
484  #===============================================================================  #===============================================================================
485    
486    sub is_overbeek_tree  { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
487    
488    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
489    
490  sub overbeek_to_gjonewick  sub overbeek_to_gjonewick
491  {  {
492      return () unless ref( $_[0] ) eq 'ARRAY';      return () unless ref( $_[0] ) eq 'ARRAY';
# Line 408  Line 506 
506      return $node;      return $node;
507  }  }
508    
509    
510  #===============================================================================  #===============================================================================
511  #  Extract tree structure values:  #  Extract tree structure values:
512  #===============================================================================  #===============================================================================
# Line 423  Line 522 
522  #     @list    = newick_desc_list( $noderef )  #     @list    = newick_desc_list( $noderef )
523  #     $int     = newick_n_desc( $noderef )  #     $int     = newick_n_desc( $noderef )
524  #     $listref = newick_desc_i( $noderef )  #     $listref = newick_desc_i( $noderef )
525    #
526  #     $bool    = node_is_tip( $noderef )  #     $bool    = node_is_tip( $noderef )
527  #     $bool    = node_is_valid( $noderef )  #     $bool    = node_is_valid( $noderef )
528    #     $bool    = node_has_lbl( $noderef )
529    #     $bool    = node_lbl_is( $noderef, $label )
530  #  #
531  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
532    
533  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }
534  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
535  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
536  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
537  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
538  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
539  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
540  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
541    
542  sub newick_desc_list {  sub newick_desc_list
543      my $node = $_[0];  {
544      ! array_ref( $node      ) ? undef           :      local $_ = $_[0];
545        array_ref( $node->[0] ) ? @{ $node->[0] } :      array_ref( $_ ) && array_ref( $_->[0] ) ? @{ $_->[0] } : ();
546                                  ()              ;  }
547  }  
548    sub newick_n_desc
549  sub newick_n_desc {  {
550      my $node = $_[0];      local $_ = $_[0];
551      ! array_ref( $node      ) ? undef                  :      array_ref( $_ ) && array_ref( $_->[0] ) ? scalar @{ $_->[0] } : 0;
552        array_ref( $node->[0] ) ? scalar @{ $node->[0] } :  }
553                                  0                      ;  
554  }  sub newick_desc_i
555    {
556  sub newick_desc_i {      local $_ = $_[0];
557      my ( $node, $i ) = @_;      my    $i = $_[1];
558      ! array_ref( $node      ) ? undef              :      array_ref( $_ ) && $i && array_ref( $_->[0] ) ? $_->[0]->[$i-1] : undef;
559        array_ref( $node->[0] ) ? $node->[0]->[$i-1] :  }
560                                  undef              ;  
561  }  sub node_is_tip
562    {
563  sub node_is_tip {      local $_ = $_[0];
564      my $node = $_[0];      ! array_ref( $_ )      ? undef             :  # Not a node ref
565      ! array_ref( $node      ) ? undef                :  # Not a node ref        array_ref( $_->[0] ) ? @{ $_->[0] } == 0 :  # Empty descend list?
       array_ref( $node->[0] ) ? @{ $node->[0] } == 0 :  # Empty descend list?  
566                                  1                    ;  # No descend list                                  1                    ;  # No descend list
567  }  }
568    
569  sub node_is_valid {      #  An array ref with nonempty descend list or a label  sub node_is_valid      #  An array ref with nonempty descend list or a label
570      my $node = $_[0];  {
571      array_ref( $node ) && (  array_ref( $node->[0] ) && @{ $node->[0] }      local $_ = $_[0];
572                            || defined( $node->[1] )      array_ref( $_ ) && ( array_ref( $_->[0] ) && @{ $_->[0] } || defined( $_->[1] ) )
                           )  
573  }  }
574    
575    sub node_has_lbl { local $_ = $_[0]->[1]; defined( $_ ) && ( $_ ne '' ) }
576    
577    sub node_lbl_is { local $_ = $_[0]->[1]; defined( $_ ) && ( $_ eq $_[1] ) }
578    
579    
580  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
581  #  Set tree structure values  #  Set tree structure values
# Line 486  Line 590 
590  sub set_newick_c4       { $_[0]->[6] = $_[1] }  sub set_newick_c4       { $_[0]->[6] = $_[1] }
591  sub set_newick_c5       { $_[0]->[7] = $_[1] }  sub set_newick_c5       { $_[0]->[7] = $_[1] }
592    
593  sub set_newick_desc_list {  sub set_newick_desc_list
594      my $node = shift;  {
595      array_ref( $node ) || return;      local $_ = shift;
596      if ( array_ref( $node->[0] ) ) { @{ $node->[0] } =   @_   }      array_ref( $_ ) || return;
597      else                           {    $node->[0]   = [ @_ ] }      if ( array_ref( $_->[0] ) ) { @{ $_->[0] } =   @_   }
598        else                        {    $_->[0]   = [ @_ ] }
599  }  }
600    
601  sub set_newick_desc_i {  sub set_newick_desc_i
602    {
603      my ( $node1, $i, $node2 ) = @_;      my ( $node1, $i, $node2 ) = @_;
604      array_ref( $node1 ) && array_ref( $node2 ) || return;      array_ref( $node1 ) && array_ref( $node2 ) || return;
605      if ( array_ref( $node1->[0] ) ) { $node1->[0]->[$i-1] =   $node2   }      if ( array_ref( $node1->[0] ) ) { $node1->[0]->[$i-1] =   $node2   }
# Line 560  Line 666 
666  #  #
667  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
668  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
669  sub newick_is_rooted {  sub newick_is_rooted
670      my $node = $_[0];  {
671      ! array_ref( $node      ) ? undef                :  # Not a node ref      local $_ = $_[0];
672        array_ref( $node->[0] ) ? @{ $node->[0] } == 2 :  # 2 branches      ! array_ref( $_      ) ? undef             :  # Not a node ref
673          array_ref( $_->[0] ) ? @{ $_->[0] } == 2 :  # 2 branches
674                                  0                    ;  # No descend list                                  0                    ;  # No descend list
675  }  }
676    
# Line 573  Line 680 
680  #  #
681  #  $bool = newick_is_unrooted( $node )  #  $bool = newick_is_unrooted( $node )
682  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
683  sub newick_is_unrooted {  sub newick_is_unrooted
684      my $node = $_[0];  {
685      ! array_ref( $node      ) ? undef                :  # Not a node ref      local $_ = $_[0];
686        array_ref( $node->[0] ) ? @{ $node->[0] } >= 3 :  # Over 2 branches      ! array_ref( $_      ) ? undef             :  # Not a node ref
687          array_ref( $_->[0] ) ? @{ $_->[0] } >= 3 :  # Over 2 branches
688                                  0                    ;  # No descend list                                  0                    ;  # No descend list
689  }  }
690    
# Line 586  Line 694 
694  #  #
695  #  $bool = newick_is_tip_rooted( $node )  #  $bool = newick_is_tip_rooted( $node )
696  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
697  sub newick_is_tip_rooted {  sub newick_is_tip_rooted
698      my $node = $_[0];  {
699      ! array_ref( $node      ) ? undef                :  # Not a node ref      local $_ = $_[0];
700        array_ref( $node->[0] ) ? @{ $node->[0] } == 1 :  # 1 branch      ! array_ref( $_      ) ? undef             :  # Not a node ref
701          array_ref( $_->[0] ) ? @{ $_->[0] } == 1 :  # 1 branch
702                                  0                    ;  # No descend list                                  0                    ;  # No descend list
703  }  }
704    
# Line 601  Line 710 
710  #  #
711  #  $n_desc = newick_is_bifurcating( $node )  #  $n_desc = newick_is_bifurcating( $node )
712  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
713  sub newick_is_bifurcating {  sub newick_is_bifurcating
714    {
715      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
716      if ( ! array_ref( $node ) ) { return undef }    #  Bad arg      if ( ! array_ref( $node ) ) { return undef }    #  Bad arg
717    
# Line 622  Line 732 
732  #  #
733  #  $n = newick_tip_count( $node )  #  $n = newick_tip_count( $node )
734  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
735  sub newick_tip_count {  sub newick_tip_count
736    {
737      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
738    
739      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
# Line 641  Line 752 
752  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
753  #  List of tip nodes:  #  List of tip nodes:
754  #  #
755  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
756    # \@tips = newick_tip_ref_list( $noderef )
757  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
758  sub newick_tip_ref_list {  sub newick_tip_ref_list
759    {
760      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
761    
762      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
# Line 652  Line 765 
765      my @list = ();      my @list = ();
766    
767      #  Tree rooted on tip?      #  Tree rooted on tip?
768      if ( $imax == 1 && ! $not_root && newick_lbl( $node ) ) { push @list, $node }      if ( ! $not_root && ( $imax == 1 ) && node_has_lbl( $node ) ) { push @list, $node }
769    
770      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) ) {
771          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
772      }      }
773    
774      @list;      wantarray ? @list : \@list;
775  }  }
776    
777    
# Line 666  Line 779 
779  #  List of tips:  #  List of tips:
780  #  #
781  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
782    # \@tips = newick_tip_list( $node )
783  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
784  sub newick_tip_list {  sub newick_tip_list
785      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );  {
786        my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
787        wantarray ? @tips : \@tips;
788  }  }
789    
790    
# Line 677  Line 793 
793  #  #
794  #  $tipref = newick_first_tip_ref( $node )  #  $tipref = newick_first_tip_ref( $node )
795  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
796  sub newick_first_tip_ref {  sub newick_first_tip_ref
797    {
798      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
799      valid_node( $node ) || return  undef;      valid_node( $node ) || return  undef;
800    
# Line 694  Line 811 
811  #  #
812  #  $tip = newick_first_tip( $node )  #  $tip = newick_first_tip( $node )
813  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
814  sub newick_first_tip {  sub newick_first_tip
815    {
816      my ( $noderef ) = @_;      my ( $noderef ) = @_;
817    
818      my $tipref;      my $tipref;
# Line 707  Line 825 
825  #  List of duplicated tip labels.  #  List of duplicated tip labels.
826  #  #
827  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
828    # \@tips = newick_duplicated_tips( $node )
829  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
830  sub newick_duplicated_tips {  sub newick_duplicated_tips
831      gjolists::duplicates( newick_tip_list( $_[0] ) );  {
832        my @tips = &duplicates( newick_tip_list( $_[0] ) );
833        wantarray ? @tips : \@tips;
834  }  }
835    
836    
# Line 718  Line 839 
839  #  #
840  #  $bool = newick_tip_in_tree( $node, $tipname )  #  $bool = newick_tip_in_tree( $node, $tipname )
841  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
842  sub newick_tip_in_tree {  sub newick_tip_in_tree
843    {
844      my ( $node, $tip, $not_root ) = @_;      my ( $node, $tip, $not_root ) = @_;
845    
846      my $n = newick_n_desc( $node );      my $n = newick_n_desc( $node );
847      if ( $n < 1 ) { return ( newick_lbl( $node ) eq $tip) ? 1 : 0 }      if ( $n < 1 ) { return node_lbl_is( $node, $tip ) ? 1 : 0 }
848    
849      #  Special case for tree rooted on tip      #  Special case for tree rooted on tip
850    
851      if ( $n == 1 && ( ! $not_root ) && newick_lbl( $node ) eq $tip ) { return 1 }      if ( ( $n == 1 ) && ( ! $not_root ) && node_lbl_is( $node, $tip ) )
852        {
853            return 1
854        }
855    
856      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) ) {
857          if ( newick_tip_in_tree( $_, $tip, 1 ) ) { return 1 }          if ( newick_tip_in_tree( $_, $tip, 1 ) ) { return 1 }
# Line 740  Line 865 
865  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
866  #  #
867  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
868    # \@tips = newick_shared_tips( $tree1, $tree2 )
869  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
870  sub newick_shared_tips {  sub newick_shared_tips
871      my ( $Tree1, $Tree2 ) = @_;  {
872      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my ( $tree1, $tree2 ) = @_;
873      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips1 = newick_tip_list( $tree1 );
874      gjolists::intersection( \@Tips1, \@Tips2 );      my $tips2 = newick_tip_list( $tree2 );
875        my @tips = &intersection( $tips1, $tips2 );
876        wantarray ? @tips : \@tips;
877  }  }
878    
879    
# Line 754  Line 882 
882  #  #
883  #  $length = newick_tree_length( $node )  #  $length = newick_tree_length( $node )
884  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
885  sub newick_tree_length {  sub newick_tree_length
886    {
887      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
888    
889      my $x = $not_root ? newick_x( $node ) : 0;      my $x = $not_root ? newick_x( $node ) : 0;
# Line 767  Line 896 
896    
897    
898  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
899    #  Hash of tip nodes and corresponding distances from root:
900    #
901    #   %tip_distances = newick_tip_distances( $node )
902    #  \%tip_distances = newick_tip_distances( $node )
903    #-------------------------------------------------------------------------------
904    sub newick_tip_distances
905    {
906        my ( $node, $x, $hash ) = @_;
907        my $root = ! $hash;
908        ref( $hash ) eq 'HASH' or $hash = {};
909    
910        $x ||= 0;
911        $x  += newick_x( $node ) || 0;
912    
913        #  Is it a tip?
914    
915        my $n_desc = newick_n_desc( $node );
916        if ( ! $n_desc )
917        {
918            $hash->{ newick_lbl( $node ) } = $x;
919            return $hash;
920        }
921    
922        #  Tree rooted on tip?
923    
924        if ( $root && ( $n_desc == 1 ) && node_has_lbl( $node ) )
925        {
926            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
927        }
928    
929        foreach ( newick_desc_list( $node ) ) { newick_tip_distances( $_, $x, $hash ) }
930    
931        wantarray ? %$hash : $hash;
932    }
933    
934    
935    #-------------------------------------------------------------------------------
936  #  Tree max X.  #  Tree max X.
937  #  #
938  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
939  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
940  sub newick_max_X {  sub newick_max_X
941    {
942      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
943    
944      my $xmax = 0;      my $xmax = 0;
# Line 790  Line 957 
957  #  #
958  #  ( $xmax, @path ) = newick_most_distant_tip_path( $tree )  #  ( $xmax, @path ) = newick_most_distant_tip_path( $tree )
959  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
960  sub newick_most_distant_tip_path {  sub newick_most_distant_tip_path
961    {
962      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
963    
964      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
# Line 812  Line 980 
980  #  #
981  #  ( $tipref, $xmax ) = newick_most_distant_tip_ref( $tree )  #  ( $tipref, $xmax ) = newick_most_distant_tip_ref( $tree )
982  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
983  sub newick_most_distant_tip_ref {  sub newick_most_distant_tip_ref
984    {
985      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
986    
987      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
# Line 834  Line 1003 
1003  #  #
1004  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $tree )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $tree )
1005  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1006  sub newick_most_distant_tip_name {  sub newick_most_distant_tip_name
1007    {
1008      my ( $tipref, $xmax ) = newick_most_distant_tip_ref( $_[0] );      my ( $tipref, $xmax ) = newick_most_distant_tip_ref( $_[0] );
1009      ( newick_lbl( $tipref ), $xmax )      ( newick_lbl( $tipref ), $xmax )
1010  }  }
# Line 910  Line 1080 
1080    
1081      else      else
1082      {      {
1083          my ( $n1, $x1 ) = describe_desc( $dl->[0] );          my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1084          my ( $n2, $x2 ) = describe_desc( $dl->[1] );          my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1085    
1086          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1087          if ( @$n2 == 2 )          if ( @$n2 == 2 )
# Line 926  Line 1096 
1096  }  }
1097    
1098    
1099  sub describe_desc  sub describe_descendant
1100  {  {
1101      my $node = shift;      my $node = shift;
1102    
# Line 942  Line 1112 
1112    
1113      #  Is it a tip?  Return list of one tip;      #  Is it a tip?  Return list of one tip;
1114    
1115      if ( ( ! $dl ) || @$dl == 0 )      if ( ( ! $dl ) || ! @$dl ) { return ( [ newick_lbl( $node ) ], $x ) }
     {  
         return ( [ newick_lbl( $node ) ], $x );  
     }  
1116    
1117      #  Get tips of each descendent, keeping lowest sorting from each.      #  Get tips of each descendent, keeping lowest sorting from each.
1118      #  Return the two lowest of those (the third will come from the      #  Return the two lowest of those (the third will come from the
1119      #  other side of the original node).      #  other side of the original node).
1120    
     else  
     {  
1121          my @rep_tips = sort { lc $a cmp lc $b }          my @rep_tips = sort { lc $a cmp lc $b }
1122                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1123                         @$dl;                         @$dl;
1124          return ( [ @rep_tips[0,1] ], $x );          return ( [ @rep_tips[0,1] ], $x );
1125      }      }
 }  
1126    
1127    
1128  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 967  Line 1131 
1131  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1132  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1133  #  #
1134  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1135  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1136  sub std_node_name {  sub std_node_name
1137    {
1138      my $tree = $_[0];      my $tree = $_[0];
1139    
1140      #  Node reference is last element of path to node      #  Node reference is last element of path to node
# Line 977  Line 1142 
1142      my $noderef = ( path_to_node( @_ ) )[-1];      my $noderef = ( path_to_node( @_ ) )[-1];
1143      defined( $noderef ) || return ();      defined( $noderef ) || return ();
1144    
1145      if ( node_is_tip( $noderef ) || $noderef eq $tree ) {  # Is it a tip?      if ( node_is_tip( $noderef ) || ( $noderef eq $tree ) ) {  # Is it a tip?
1146          return newick_lbl( $noderef );          return newick_lbl( $noderef );
1147      }      }
1148    
# Line 985  Line 1150 
1150      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1151    
1152      my @rest = newick_tip_list( $tree );      my @rest = newick_tip_list( $tree );
1153      my @best = map {      my @best = map
1154              {
1155              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1156              @rest = gjolists::set_difference( \@rest, \@tips );              @rest = &set_difference( \@rest, \@tips );
1157              $tips[0];              $tips[0];
1158          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1159    
# Line 1013  Line 1179 
1179  #  #
1180  #  @path = path_to_tip( $treenode, $tipname )  #  @path = path_to_tip( $treenode, $tipname )
1181  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1182  sub path_to_tip {  sub path_to_tip
1183      my ( $node, $tip, @path0 ) = @_;  {
1184        my ( $node, $tip ) = @_;
1185    
     push @path0, $node;  
1186      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
     if ( $imax < 1 ) { return ( newick_lbl( $node ) eq $tip ) ? @path0 : () }  
1187    
1188      #  Special case for tree rooted on tip      #  Tip (including root tip):
1189    
1190      if ( ( $imax  == 1 )                  #  One descendant      return ( $node ) if ( $imax < 2 ) && node_lbl_is( $node, $tip );
       && ( @path0 == 1 )                  #  First step in path  
       && ( newick_lbl( $node ) eq $tip )  #  Label matches  
        ) { return @path0 }  
1191    
     my @path;  
1192      for (my $i = 1; $i <= $imax; $i++ ) {      for (my $i = 1; $i <= $imax; $i++ ) {
1193         @path = path_to_tip( newick_desc_i( $node, $i ), $tip, ( @path0, $i ) );         my @suf = path_to_tip( newick_desc_i( $node, $i ), $tip );
1194         if ( @path ) { return @path }         return ( $node, $i, @suf ) if @suf;
1195      }      }
1196    
1197      ();  #  Not found      ();  #  Not found
# Line 1038  Line 1199 
1199    
1200    
1201  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1202  #  Path to named node.  #  Paths to tips:
1203  #  Like path to tip, but will find named internal nodes as well.  #
1204    #  \%paths = paths_to_tips( $treenode, \@tips )
1205    #  \%paths = paths_to_tips( $treenode, \%tips )
1206    #
1207    #-------------------------------------------------------------------------------
1208    sub paths_to_tips
1209    {
1210        my ( $node, $tips ) = @_;
1211        return {} if ! ( $tips && ref( $tips ) );
1212    
1213        #  Replace request for list with request by hash
1214    
1215        if ( ref( $tips ) eq 'ARRAY' ) { $tips = { map { $_ => 1 } @$tips } }
1216    
1217        my $paths = {};
1218        my $imax = newick_n_desc( $node );
1219        if ( $imax < 2 )
1220        {
1221            my $lbl;
1222            if ( node_has_lbl( $node ) && defined( $lbl = newick_lbl( $node ) ) && $tips->{ $lbl } )
1223            {
1224                delete $tips->{ $lbl };
1225                $paths->{ $lbl } = [ $node ];
1226            }
1227            return $paths if ! $imax;  # tip (no more to do it tested below)
1228        }
1229    
1230        for ( my $i = 1; $i <= $imax && keys %$tips; $i++ )
1231        {
1232           my $new = paths_to_tips( newick_desc_i( $node, $i ), $tips );
1233           foreach ( keys %$new )
1234           {
1235               splice @{ $new->{ $_ } }, 0, 0, ( $node, $i );
1236               $paths->{ $_ } = $new->{ $_ };
1237           }
1238        }
1239    
1240        return $paths;
1241    }
1242    
1243    
1244    #-------------------------------------------------------------------------------
1245    #  Path to named node.  Like path to tip, but also finds named internal nodes.
1246  #  #
1247  #  @path = path_to_named_node( $treenode, $name )  #  @path = path_to_named_node( $treenode, $name )
1248    #
1249  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1250  sub path_to_named_node {  sub path_to_named_node
1251      my ( $node, $name, @path0 ) = @_;  {
1252        my ( $node, $name ) = @_;
1253    
1254      push @path0, $node;      return ( $node ) if node_lbl_is( $node, $name );
     if ( newick_lbl( $node ) eq $name ) { return @path0 }  
1255    
     my @path;  
1256      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1257      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1258         @path = path_to_named_node( newick_desc_i( $node, $i ), $name, ( @path0, $i ) );         my @suf = path_to_named_node( newick_desc_i( $node, $i ), $name );
1259         if ( @path ) { return @path }         return ( $node, $i, @suf ) if @suf;
1260      }      }
1261    
1262      ();  #  Not found      ();  #  Not found
# Line 1061  Line 1264 
1264    
1265    
1266  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1267    #  Paths to named nodes in tree (need not be tips):
1268    #
1269    #  \%paths = paths_to_named_nodes( $treenode, \@names )
1270    #  \%paths = paths_to_named_nodes( $treenode, \%names )
1271    #
1272    #-------------------------------------------------------------------------------
1273    sub paths_to_named_nodes
1274    {
1275        my ( $node, $names ) = @_;
1276        return {} if ! ( $names && ref( $names ) );
1277    
1278        #  Replace request for list with request by hash
1279    
1280        if ( ref( $names ) eq 'ARRAY' ) { $names = { map { $_ => 1 } @$names } }
1281    
1282        my $paths = {};
1283        my $imax = newick_n_desc( $node );
1284    
1285        my $lbl;
1286        if ( node_has_lbl( $node ) && defined( $lbl = newick_lbl( $node ) ) && $names->{ $lbl } )
1287        {
1288            delete $names->{ $lbl };
1289            $paths->{ $lbl } = [ $node ];
1290        }
1291        return $paths if ! $imax;  # tip (no more to do it tested below)
1292    
1293        for ( my $i = 1; $i <= $imax && keys %$names; $i++ )
1294        {
1295           my $new = paths_to_named_nodes( newick_desc_i( $node, $i ), $names );
1296           foreach ( keys %$new )
1297           {
1298               splice @{ $new->{ $_ } }, 0, 0, ( $node, $i );
1299               $paths->{ $_ } = $new->{ $_ };
1300           }
1301        }
1302    
1303        return $paths;
1304    }
1305    
1306    
1307    #-------------------------------------------------------------------------------
1308  #  Path to node reference.  #  Path to node reference.
1309  #  #
1310  #  @path = path_to_node_ref( $treenode, $noderef )  #  @path = path_to_node_ref( $treenode, $noderef )
1311    #
1312  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1313  sub path_to_node_ref {  sub path_to_node_ref
1314      my ( $node, $noderef, @path0 ) = @_;  {
1315        my ( $node, $noderef ) = @_;
1316    
1317      push @path0, $node;      return ( $node ) if ( $node eq $noderef );
     if ( $node eq $noderef ) { return @path0 }  
1318    
     my @path;  
1319      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1320      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1321         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );          my @suf = path_to_node_ref( newick_desc_i( $node, $i ), $noderef );
1322         if ( @path ) { return @path }          return ( $node, $i, @suf ) if @suf;
1323      }      }
1324    
1325      ();  #  Not found      ();  #  Not found
# Line 1083  Line 1327 
1327    
1328    
1329  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1330  #  Path to node, as defined by 1 or 3 tips.  #  Path to node, as defined by 1, 2 or 3 node names (usually tips).
1331  #  #
1332  #  @path = path_to_node( $node,   $tip1, $tip2, $tip3   )  #  3 tip names  #  @path = path_to_node( $tree,   $name1, $name2, $name3   )  #  3 tip names
1333  #  @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] )  #  Allow array ref  #  @path = path_to_node( $tree, [ $name1, $name2, $name3 ] )  #  Allow array ref
1334  #  @path = path_to_node( $node,   $tip1                 )  #  Use path_to_tip  #  @path = path_to_node( $tree,   $name1, $name2   )          #  2 tip names
1335  #  @path = path_to_node( $node, [ $tip1 ]               )  #  Use path_to_tip  #  @path = path_to_node( $tree, [ $name1, $name2 ] )          #  Allow array ref
1336  #-------------------------------------------------------------------------------  #  @path = path_to_node( $tree,   $name1   )                  #  Path to tip or named node
1337  sub path_to_node {  #  @path = path_to_node( $tree, [ $name1 ] )                  #  Allow array ref
1338      my ( $node, $tip1, $tip2, $tip3 ) = @_;  #
1339      array_ref( $node ) && defined( $tip1 ) || return ();  #-------------------------------------------------------------------------------
1340    sub path_to_node
1341    {
1342        my ( $tree, @names ) = @_;
1343        array_ref( $tree ) && defined( $names[0] ) || return ();
1344    
1345      # Allow arg 2 to be an array reference      # Allow arg 2 to be an array reference
     if ( array_ref( $tip1 ) ) { ( $tip1, $tip2, $tip3 ) = @$tip1 }  
1346    
1347      my @p1 = path_to_tip( $node, $tip1 );                #  Path to first tip      @names = @{ $names[0] }  if array_ref( $names[0] );
     @p1 || return ();                                    #  Was the tip found?  
     defined( $tip2 ) && defined( $tip3 ) || return @p1;  #  Two more defined?  
1348    
1349      my @p2 = path_to_tip( $node, $tip2 );      return () if @names < 1 || @names > 3;
     my @p3 = path_to_tip( $node, $tip3 );  
     @p2 && @p3 || return ();                             #  Were they found?  
1350    
1351      # Find the common prefix for each pair of paths      #  Just one name:
1352      my @p12 = gjolists::common_prefix( \@p1, \@p2 );  
1353      my @p13 = gjolists::common_prefix( \@p1, \@p3 );      return path_to_named_node( $tree, $names[0] ) if ( @names == 1 );
1354      my @p23 = gjolists::common_prefix( \@p2, \@p3 );  
1355        my @paths = values %{ paths_to_named_nodes( $tree, \@names ) };
1356    
1357        #  Were all node names found?
1358    
1359        return () if @paths != @names;
1360    
1361        my @path12 = &common_prefix( @paths[0,1] );
1362        return () if ! @path12;
1363        return @path12 if @paths == 2;
1364    
1365        my @path13 = &common_prefix( @paths[0,2] );
1366        my @path23 = &common_prefix( @paths[1,2] );
1367    
1368      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1369      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :  
1370      ( @p13 >= @p23 )                 ? @p13 :      ( @path12 >= @path13 && @path12 >= @path23 ) ? @path12 :
1371                                         @p23 ;      ( @path13 >= @path23 )                       ? @path13 :
1372                                                       @path23 ;
1373  }  }
1374    
1375    
# Line 1121  Line 1377 
1377  #  Distance along path.  #  Distance along path.
1378  #  #
1379  #  $distance = newick_path_length( @path )  #  $distance = newick_path_length( @path )
1380    #
1381  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1382  sub newick_path_length {  sub newick_path_length
1383    {
1384      my $node = shift;      #  Discard the first node      my $node = shift;      #  Discard the first node
1385      array_ref( $node ) || return undef;      array_ref( $node ) || return undef;
1386      @_ ? distance_along_path_2( @_ ) : 0;      @_ ? distance_along_path_2( @_ ) : 0;
# Line 1133  Line 1391 
1391  #  This expects to get path minus root node:  #  This expects to get path minus root node:
1392  #  #
1393  #  $distance = distance_along_path_2( @path )  #  $distance = distance_along_path_2( @path )
1394    #
1395  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1396  sub distance_along_path_2 {  sub distance_along_path_2
1397    {
1398      shift;                 #  Discard descendant number      shift;                 #  Discard descendant number
1399      my $node = shift;      my $node = shift;
1400      array_ref( $node ) || return undef;      array_ref( $node ) || return undef;
# Line 1148  Line 1408 
1408  #  Tip-to-tip distance.  #  Tip-to-tip distance.
1409  #  #
1410  #  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )  #  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )
1411    #
1412  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1413  sub tip_to_tip_distance {  sub tip_to_tip_distance
1414    {
1415      my ( $node, $tip1, $tip2 ) = @_;      my ( $node, $tip1, $tip2 ) = @_;
1416    
1417      array_ref( $node ) && defined( $tip1 )      array_ref( $node ) && defined( $tip1 )
# Line 1159  Line 1421 
1421      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1422    
1423      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1424      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1425      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1426      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1427    
# Line 1174  Line 1436 
1436  #                [ $tipname1, $tipname2, $tipname3 ]  #                [ $tipname1, $tipname2, $tipname3 ]
1437  #  #
1438  #  $distance = node_to_node_distance( $tree, $node1, $node2 )  #  $distance = node_to_node_distance( $tree, $node1, $node2 )
1439    #
1440  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1441  sub node_to_node_distance {  sub node_to_node_distance
1442    {
1443      my ( $node, $node1, $node2 ) = @_;      my ( $node, $node1, $node2 ) = @_;
1444    
1445      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
# Line 1184  Line 1448 
1448      my @p2 = path_to_node( $node, $node2 ) or return undef;      my @p2 = path_to_node( $node, $node2 ) or return undef;
1449    
1450      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1451      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1452      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1453      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1454    
# Line 1200  Line 1464 
1464  #  Only defined fields are added, so tree list may be shorter than 8 fields.  #  Only defined fields are added, so tree list may be shorter than 8 fields.
1465  #  #
1466  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
1467    #
1468  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1469  sub copy_newick_tree {  sub copy_newick_tree
1470    {
1471      my ( $node ) = @_;      my ( $node ) = @_;
1472      array_ref( $node ) || return undef;      array_ref( $node ) || return undef;
1473    
# Line 1234  Line 1500 
1500  #  Use a hash to relabel the nodes in a newick tree.  #  Use a hash to relabel the nodes in a newick tree.
1501  #  #
1502  #  $newtree = newick_relabel_nodes( $node, \%new_name )  #  $newtree = newick_relabel_nodes( $node, \%new_name )
1503    #
1504  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1505  sub newick_relabel_nodes {  sub newick_relabel_nodes
1506    {
1507      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1508    
1509      my ( $lbl, $new );      my ( $new );
1510      if ( defined( $lbl = newick_lbl( $node ) )      if ( node_has_lbl( $node ) && defined( $new = $new_name->{ newick_lbl( $node ) } ) ) {
       && ( $lbl ne "" )  
       && defined( $new = $new_name->{ $lbl } )  
        ) {  
1511          set_newick_lbl( $node, $new );          set_newick_lbl( $node, $new );
1512      }      }
1513    
# Line 1258  Line 1523 
1523  #  Use a hash to relabel the nodes in a newick tree (case insensitive).  #  Use a hash to relabel the nodes in a newick tree (case insensitive).
1524  #  #
1525  #  $newtree = newick_relabel_nodes_i( $node, \%new_name )  #  $newtree = newick_relabel_nodes_i( $node, \%new_name )
1526    #
1527  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1528  sub newick_relabel_nodes_i {  sub newick_relabel_nodes_i
1529    {
1530      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1531    
1532      #  Add any necessary lowercase keys to the hash:      #  Add any necessary lowercase keys to the hash:
# Line 1277  Line 1544 
1544  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1545  #  Do the actual relabeling  #  Do the actual relabeling
1546  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1547  sub newick_relabel_nodes_i2 {  sub newick_relabel_nodes_i2
1548    {
1549      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1550    
1551      my ( $lbl, $new );      my ( $new );
1552      if ( defined( $lbl = newick_lbl( $node ) )      if ( node_has_lbl( $node ) && defined( $new = $new_name->{ lc newick_lbl( $node ) } ) ) {
       && ( $lbl ne "" )  
       && defined( $new = $new_name->{ lc $lbl } )  
        ) {  
1553          set_newick_lbl( $node, $new );          set_newick_lbl( $node, $new );
1554      }      }
1555    
# Line 1300  Line 1565 
1565  #  Use a hash to relabel the tips in a newick tree.  #  Use a hash to relabel the tips in a newick tree.
1566  #  #
1567  #  $newtree = newick_relabel_tips( $node, \%new_name )  #  $newtree = newick_relabel_tips( $node, \%new_name )
1568    #
1569  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1570  sub newick_relabel_tips {  sub newick_relabel_tips
1571    {
1572      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1573    
1574      my @desc = newick_desc_list( $node );      my @desc = newick_desc_list( $node );
# Line 1310  Line 1577 
1577          foreach ( @desc ) { newick_relabel_tips( $_, $new_name ) }          foreach ( @desc ) { newick_relabel_tips( $_, $new_name ) }
1578      }      }
1579      else {      else {
1580          my ( $lbl, $new );          my ( $new );
1581          if ( defined( $lbl = newick_lbl( $node ) )          if ( node_has_lbl( $node ) && defined( $new = $new_name->{ newick_lbl( $node ) } ) ) {
           && ( $lbl ne "" )  
           && defined( $new = $new_name->{ $lbl } )  
            ) {  
1582              set_newick_lbl( $node, $new );              set_newick_lbl( $node, $new );
1583          }          }
1584      }      }
# Line 1327  Line 1591 
1591  #  Use a hash to relabel the tips in a newick tree (case insensitive).  #  Use a hash to relabel the tips in a newick tree (case insensitive).
1592  #  #
1593  #  $newtree = newick_relabel_tips_i( $node, \%new_name )  #  $newtree = newick_relabel_tips_i( $node, \%new_name )
1594    #
1595  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1596  sub newick_relabel_tips_i {  sub newick_relabel_tips_i
1597    {
1598      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1599    
1600      #  Add any necessary lowercase keys to the hash:      #  Add any necessary lowercase keys to the hash:
# Line 1346  Line 1612 
1612  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1613  #  Do the actual relabeling  #  Do the actual relabeling
1614  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1615  sub newick_relabel_tips_i2 {  sub newick_relabel_tips_i2
1616    {
1617      my ( $node, $new_name ) = @_;      my ( $node, $new_name ) = @_;
1618    
1619      my @desc = newick_desc_list( $node );      my @desc = newick_desc_list( $node );
# Line 1355  Line 1622 
1622          foreach ( @desc ) { newick_relabel_tips_i2( $_, $new_name ) }          foreach ( @desc ) { newick_relabel_tips_i2( $_, $new_name ) }
1623      }      }
1624      else {      else {
1625          my ( $lbl, $new );          my ( $new );
1626          if ( defined( $lbl = newick_lbl( $node ) )          if ( node_has_lbl( $node ) && defined( $new = $new_name->{ lc newick_lbl( $node ) } ) ) {
           && ( $lbl ne "" )  
           && defined( $new = $new_name->{ lc $lbl } )  
            ) {  
1627              set_newick_lbl( $node, $new );              set_newick_lbl( $node, $new );
1628          }          }
1629      }      }
# Line 1372  Line 1636 
1636  #  Set undefined branch lenghts (except root) to length x.  #  Set undefined branch lenghts (except root) to length x.
1637  #  #
1638  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
1639    #
1640  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1641  sub newick_set_undefined_branches {  sub newick_set_undefined_branches
1642    {
1643      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1644    
1645      my $n = 0;      my $n = 0;
# Line 1394  Line 1660 
1660  #  Set all branch lenghts (except root) to length x.  #  Set all branch lenghts (except root) to length x.
1661  #  #
1662  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
1663    #
1664  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1665  sub newick_set_all_branches {  sub newick_set_all_branches
1666    {
1667      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1668    
1669      my $n = 0;      my $n = 0;
# Line 1418  Line 1686 
1686  #  Rescale all branch lenghts by factor.  #  Rescale all branch lenghts by factor.
1687  #  #
1688  #  $node = newick_rescale_branches( $node, $factor )  #  $node = newick_rescale_branches( $node, $factor )
1689    #
1690  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1691  sub newick_rescale_branches {  sub newick_rescale_branches
1692    {
1693      my ( $node, $factor ) = @_;      my ( $node, $factor ) = @_;
1694    
1695      my $x = newick_x( $node );      my $x = newick_x( $node );
# Line 1435  Line 1705 
1705    
1706    
1707  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1708    #  Set all branch lenghts (except root) to random number between x1 and x2.
1709    #
1710    #  $node = newick_random_branch_lengths( $node, $x1, $x2 )
1711    #
1712    #-------------------------------------------------------------------------------
1713    sub newick_random_branch_lengths
1714    {
1715        my ( $node, $x1, $x2 ) = @_;
1716        return undef if ! array_ref( $node );
1717        $x1 = 0        if ! defined( $x1 ) || $x1 < 0;
1718        $x2 = $x1 + 1  if ! defined( $x2 ) || $x2 < $x1;
1719        newick_random_branch_lengths_0( $node, $x1, $x2, 0 );
1720    }
1721    
1722    
1723    sub newick_random_branch_lengths_0
1724    {
1725        my ( $node, $x1, $x2, $not_root ) = @_;
1726    
1727        set_newick_x( $node, rand($x2-$x1) + $x1 ) if ( $not_root );
1728        foreach ( newick_desc_list( $node ) ) { newick_random_branch_lengths_0( $_, $x1, $x2, 1 ) }
1729    
1730        $node;
1731    }
1732    
1733    
1734    #-------------------------------------------------------------------------------
1735    #  Modify all branch lengths by a function.
1736    #
1737    #     $node = newick_modify_branches( $node, \&function )
1738    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1739    #
1740    #  Function must have form
1741    #
1742    #     $x2 = &$function( $x1 )
1743    #     $x2 = &$function( $x1, @$func_parms )
1744    #
1745    #-------------------------------------------------------------------------------
1746    sub newick_modify_branches
1747    {
1748        my ( $node, $func, $parm ) = @_;
1749    
1750        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1751        foreach ( newick_desc_list( $node ) )
1752        {
1753            newick_modify_branches( $_, $func, $parm )
1754        }
1755    
1756        $node;
1757    }
1758    
1759    
1760    #-------------------------------------------------------------------------------
1761  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1762  #  #
1763  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
1764    #
1765  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1766  sub newick_fix_negative_branches {  sub newick_fix_negative_branches
1767    {
1768      my ( $tree ) = @_;      my ( $tree ) = @_;
1769      array_ref( $tree ) or return undef;      array_ref( $tree ) or return undef;
1770      my $n_changed = 0;      my $n_changed = 0;
# Line 1463  Line 1788 
1788  #  Remove comments from a newick tree (e.g., before writing for phylip).  #  Remove comments from a newick tree (e.g., before writing for phylip).
1789  #  #
1790  #  $node = newick_strip_comments( $node )  #  $node = newick_strip_comments( $node )
1791    #
1792  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1793  sub newick_strip_comments {  sub newick_strip_comments
1794    {
1795      my ( $node ) = @_;      my ( $node ) = @_;
1796    
1797      @$node = @$node[ 0 .. 2 ];      @$node = @$node[ 0 .. 2 ];
# Line 1477  Line 1804 
1804  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1805  #  #
1806  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
1807    #
1808  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1809  sub normalize_newick_tree {  sub normalize_newick_tree
1810    {
1811      my ( $node ) = @_;      my ( $node ) = @_;
1812    
1813      my @descends = newick_desc_list( $node );      my @descends = newick_desc_list( $node );
# Line 1496  Line 1825 
1825  #  Reverse tree order (in place).  #  Reverse tree order (in place).
1826  #  #
1827  #  $tree = reverse_newick_tree( $tree )  #  $tree = reverse_newick_tree( $tree )
1828    #
1829  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1830  sub reverse_newick_tree {  sub reverse_newick_tree
1831    {
1832      my ( $node ) = @_;      my ( $node ) = @_;
1833    
1834      my @descends = newick_desc_list( $node );      my @descends = newick_desc_list( $node );
# Line 1513  Line 1844 
1844  #  Standard unrooted tree (in place).  #  Standard unrooted tree (in place).
1845  #  #
1846  #  $stdtree = std_unrooted_newick( $tree )  #  $stdtree = std_unrooted_newick( $tree )
1847    #
1848  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1849  sub std_unrooted_newick {  sub std_unrooted_newick
1850    {
1851      my ( $tree ) = @_;      my ( $tree ) = @_;
1852    
1853      my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );      my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
# Line 1523  Line 1856 
1856    
1857    
1858  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1859  #  Move largest groups to periphery of tree (in place).  #  Standard name for a Newick tree topology
1860  #  #
1861  #      dir  <= -2 for up-sweeping tree (big groups always first),  #    $stdname = std_tree_name( $tree )
 #            = -1 for big group first, balanced tree,  
 #            =  0 for balanced tree,  
 #            =  1 for small group first, balanced tree, and  
 #           >=  2 for down-sweeping tree (small groups always top)  
1862  #  #
 #  $tree = aesthetic_newick_tree( $treeref, $dir )  
1863  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1864  sub aesthetic_newick_tree {  sub std_tree_name
1865      my ( $tree, $dir ) = @_;  {
1866      my %cnt;      my ( $tree ) = @_;
1867        my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
1868        ( std_tree_name_2( reroot_newick_next_to_tip( copy_newick_tree( $tree ), $mintip ) ) )[0];
1869    }
1870    
1871      $dir = ! $dir       ?        0 :  #  Undefined or zero  
1872    #
1873    #  ( $name, $mintip ) = std_tree_name_2( $node )
1874    #
1875    sub std_tree_name_2
1876    {
1877        my ( $node ) = @_;
1878    
1879        my @descends = newick_desc_list( $node );
1880        if ( @descends == 0 )
1881        {
1882            my $lbl = newick_lbl( $node );
1883            return ( $lbl, $lbl );
1884        }
1885    
1886        my @list = sort { lc $a->[1] cmp lc $b->[1] || $a->[1] cmp $b->[1] }
1887                   map  { [ std_tree_name_2( $_ ) ] }
1888                   @descends;
1889        my $mintip = $list[0]->[1];
1890        my $name   = '(' . join( "\t", map { $_->[0] } @list ) . ')';
1891    
1892        return ( $name, $mintip );
1893    }
1894    
1895    
1896    #-------------------------------------------------------------------------------
1897    #  Move largest groups to periphery of tree (in place).
1898    #
1899    #  $tree = aesthetic_newick_tree( $treeref, $dir )
1900    #
1901    #      dir  <= -2 for up-sweeping tree (big groups always first),
1902    #            = -1 for big group first, balanced tree,
1903    #            =  0 for balanced tree,
1904    #            =  1 for small group first, balanced tree, and
1905    #           >=  2 for down-sweeping tree (small groups always top)
1906    #
1907    #-------------------------------------------------------------------------------
1908    sub aesthetic_newick_tree
1909    {
1910        my ( $tree, $dir ) = @_;
1911        my %cnt;
1912    
1913        $dir = ! $dir       ?        0 :  #  Undefined or zero
1914               $dir <= -2 ? -1000000 :               $dir <= -2 ? -1000000 :
1915               $dir <   0 ?       -1 :               $dir <   0 ?       -1 :
1916               $dir >=  2 ?  1000000 :               $dir >=  2 ?  1000000 :
# Line 1552  Line 1925 
1925  #  Access count with $cntref->{$noderef}  #  Access count with $cntref->{$noderef}
1926  #  #
1927  #  $count = build_tip_count_hash( $node, $cnt_hash_ref )  #  $count = build_tip_count_hash( $node, $cnt_hash_ref )
1928    #
1929  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1930  sub build_tip_count_hash {  sub build_tip_count_hash
1931    {
1932      my ( $node, $cntref ) = @_;      my ( $node, $cntref ) = @_;
1933      my ( $i, $cnt );      my ( $i, $cnt );
1934    
# Line 1576  Line 1951 
1951  #           = 0 for no change, and  #           = 0 for no change, and
1952  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1953  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1954  sub reorder_by_tip_count {  sub reorder_by_tip_count
1955    {
1956      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1957    
1958      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1959      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1960    
     #  Reorder this subtree:  
   
1961      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1962      if    ( $dir < 0 ) {                   #  Big group first  
1963          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1964    
1965        if ( $dir )
1966        {
1967            #  Big group first
1968            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1969    
1970            my ( @dl1, @dl2 );
1971            for ( my $i = 0; $i < $nd; $i++ ) {
1972                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1973      }      }
1974      elsif ( $dir > 0 ) {                   #  Small group first  
1975          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1976                                    : ( @dl2, reverse @dl1 );
1977      }      }
1978    
1979      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1613  Line 1997 
1997  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1998  #  Move smallest groups to periphery of tree (in place).  #  Move smallest groups to periphery of tree (in place).
1999  #  #
2000    #  $tree = unaesthetic_newick_tree( $treeref, $dir )
2001    #
2002  #      dir  <= -2 for up-sweeping tree (big groups always first),  #      dir  <= -2 for up-sweeping tree (big groups always first),
2003  #            = -1 for big group first, balanced tree,  #            = -1 for big group first, balanced tree,
2004  #            =  0 for balanced tree,  #            =  0 for balanced tree,
2005  #            =  1 for small group first, balanced tree, and  #            =  1 for small group first, balanced tree, and
2006  #           >=  2 for down-sweeping tree (small groups always top)  #           >=  2 for down-sweeping tree (small groups always top)
2007  #  #
 #  $tree = unaesthetic_newick_tree( $treeref, $dir )  
2008  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2009  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
2010    {
2011      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
2012      my %cnt;      my %cnt;
2013    
# Line 1641  Line 2027 
2027  #           = 0 for no change, and  #           = 0 for no change, and
2028  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
2029  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2030  sub reorder_against_tip_count {  sub reorder_against_tip_count
2031    {
2032      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
2033    
2034      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1679  Line 2066 
2066  #  Randomize descendant order at each node (in place).  #  Randomize descendant order at each node (in place).
2067  #  #
2068  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
2069    #
2070  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2071  sub random_order_newick_tree {  sub random_order_newick_tree
2072    {
2073      my ( $node ) = @_;      my ( $node ) = @_;
2074    
2075      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1689  Line 2078 
2078      #  Reorder this subtree:      #  Reorder this subtree:
2079    
2080      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
2081      @$dl_ref = gjolists::random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
2082    
2083      #  Reorder descendants:      #  Reorder descendants:
2084    
# Line 1700  Line 2089 
2089    
2090    
2091  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2092    #  Reroot a tree using method specified by options.
2093    #
2094    #     $newtree = reroot_tree( $tree, \%options )
2095    #
2096    #  Options
2097    #
2098    #     adjacent_to_tip =>  $tip         # root next to named tip (no nodes)
2099    #     adjacent_to_tip =>  $bool        # root next to tip defined by nodes
2100    #     distance        =>  $distance    # distance on path from node1 to node2
2101    #     fraction        =>  $fraction    # fraction of path from node1 to node2
2102    #     midpoint        =>  $bool        # midpoint root tree (no nodes)
2103    #     node            =>  $node_spec   # just one node spec
2104    #     nodes           => \@node_specs  # 0, 1 or 2 node specifiers
2105    #     tip             =>  $tip         # short way to get tip root
2106    #
2107    #  node_spec can be 1, 2 or 3 node labels:
2108    #
2109    #     With 1 label, it is the node with that name (tip or internal)
2110    #     With 2 labels, it is the most recent common ancestor of the 2 named nodes
2111    #     With 3 labels, it is the intersection point of the paths to the 3 nodes
2112    #
2113    #-------------------------------------------------------------------------------
2114    sub reroot_tree
2115    {
2116        my ( $tree, $opts ) = @_;
2117        return undef if ! array_ref( $tree );
2118        $opts ||= {};
2119    
2120        return reroot_newick_to_midpoint_w( $tree ) if $opts->{ midpoint };
2121    
2122        #  All other options require 1 or 2 node specifiers
2123    
2124        my @nodes = array_ref( $opts->{ nodes } ) ? @{ $opts->{ nodes } } : ();
2125        push @nodes, $opts->{ node } if array_ref( $opts->{ node } );
2126    
2127        foreach ( @nodes )
2128        {
2129            next if ( array_ref( $_ ) && ( @$_ > 0 ) && ( @$_ <= 3 ) );
2130            print STDERR "Bad node specifier passed to gjonewicklib::reroot_tree().\n";
2131            return $tree;
2132        }
2133    
2134        my $adj_to_tip = $opts->{ adjacent_to_tip };
2135        my $distance   = $opts->{ distance };
2136        my $fraction   = $opts->{ fraction };
2137        my $tip        = $opts->{ tip };
2138    
2139        if ( defined( $distance ) && @nodes == 2 )
2140        {
2141            $distance = 0 if $distance < 0;
2142            $tree = reroot_newick_at_dist_between_nodes( $tree, @nodes, $distance )
2143        }
2144        elsif ( @nodes == 2 )
2145        {
2146            $fraction = 0.5 if ! defined( $fraction );
2147            $fraction = 0   if $fraction < 0;
2148            $fraction = 1   if $fraction > 1;
2149            $tree = reroot_newick_between_nodes( $tree, @nodes, $fraction )
2150        }
2151        elsif ( $adj_to_tip )
2152        {
2153            $adj_to_tip = $nodes[0]->[0] if @nodes == 1 && @{$nodes[0]} == 1;
2154            $tree = reroot_newick_next_to_tip( $tree, $adj_to_tip );
2155        }
2156        elsif ( @nodes == 1 )
2157        {
2158            #  Root at node:
2159            $tree = reroot_newick_to_node( $tree, $nodes[0] );
2160        }
2161        elsif ( defined( $tip ) && $tip ne '' )
2162        {
2163            #  Root at tip:
2164            $tree = reroot_newick_to_tip( $tree, $tip );
2165        }
2166    
2167        return $tree;
2168    }
2169    
2170    
2171    #-------------------------------------------------------------------------------
2172  #  Reroot a tree to the node that lies at the end of a path.  #  Reroot a tree to the node that lies at the end of a path.
2173  #  #
2174  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
2175    #
2176  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2177  sub reroot_newick_by_path {  sub reroot_newick_by_path
2178    {
2179      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
2180      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
2181    
# Line 1769  Line 2240 
2240  #  Move root of tree to named tip.  #  Move root of tree to named tip.
2241  #  #
2242  #  $newtree = reroot_newick_to_tip( $tree, $tip )  #  $newtree = reroot_newick_to_tip( $tree, $tip )
2243    #
2244  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2245  sub reroot_newick_to_tip {  sub reroot_newick_to_tip
2246    {
2247      my ( $tree, $tipname ) = @_;      my ( $tree, $tipname ) = @_;
2248      reroot_newick_by_path( path_to_tip( $tree, $tipname ) );      reroot_newick_by_path( path_to_tip( $tree, $tipname ) );
2249  }  }
# Line 1780  Line 2253 
2253  #  Move root of tree to be node adjacent to a named tip.  #  Move root of tree to be node adjacent to a named tip.
2254  #  #
2255  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
2256    #
2257  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2258  sub reroot_newick_next_to_tip {  sub reroot_newick_next_to_tip
2259    {
2260      my ( $tree, $tipname ) = @_;      my ( $tree, $tipname ) = @_;
2261      my @path = path_to_tip( $tree, $tipname );      my @path = path_to_tip( $tree, $tipname );
2262      @path || return undef;      @path || return undef;
# Line 1794  Line 2269 
2269  #  Move root of tree to a node, defined by 1 or 3 tip names.  #  Move root of tree to a node, defined by 1 or 3 tip names.
2270  #  #
2271  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
2272    #
2273  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2274  sub reroot_newick_to_node {  sub reroot_newick_to_node
2275    {
2276      reroot_newick_by_path( path_to_node( @_ ) );      reroot_newick_by_path( path_to_node( @_ ) );
2277  }  }
2278    
# Line 1804  Line 2281 
2281  #  Move root of tree to a node, defined by reference.  #  Move root of tree to a node, defined by reference.
2282  #  #
2283  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
2284    #
2285  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2286  sub reroot_newick_to_node_ref {  sub reroot_newick_to_node_ref
2287    {
2288      my ( $tree, $node ) = @_;      my ( $tree, $node ) = @_;
2289      reroot_newick_by_path( path_to_node_ref( $tree, $node ) );      reroot_newick_by_path( path_to_node_ref( $tree, $node ) );
2290  }  }
2291    
2292    
2293  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2294    #  Reroot a newick tree along the path between 2 nodes:
2295    #
2296    #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
2297    #
2298    #-------------------------------------------------------------------------------
2299    sub reroot_newick_between_nodes
2300    {
2301        my ( $tree, $node1, $node2, $fraction ) = @_;
2302        array_ref( $tree ) or return undef;
2303    
2304        #  Find the paths to the nodes:
2305    
2306        my @path1 = path_to_node( $tree, $node1 ) or return $tree;
2307        my @path2 = path_to_node( $tree, $node2 ) or return $tree;
2308    
2309        reroot_newick_between_nodes_by_path( \@path1, \@path2, $fraction )
2310    }
2311    
2312    
2313    #-------------------------------------------------------------------------------
2314    #  Reroot a newick tree along the path between 2 nodes:
2315    #
2316    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
2317    #
2318    #-------------------------------------------------------------------------------
2319    sub reroot_newick_between_node_refs
2320    {
2321        my ( $tree, $node1, $node2, $fraction ) = @_;
2322        array_ref( $tree ) or return undef;
2323    
2324        #  Find the paths to the nodes:
2325    
2326        my @path1 = path_to_node_ref( $tree, $node1 ) or return $tree;
2327        my @path2 = path_to_node_ref( $tree, $node2 ) or return $tree;
2328    
2329        reroot_newick_between_nodes_by_path( \@path1, \@path2, $fraction )
2330    }
2331    
2332    
2333    #-------------------------------------------------------------------------------
2334    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2335    #
2336    #  $tree = reroot_newick_between_nodes_by_path( $path1, $path2, $fraction )
2337    #
2338    #-------------------------------------------------------------------------------
2339    sub reroot_newick_between_nodes_by_path
2340    {
2341        my ( $path1, $path2, $fraction ) = @_;
2342        array_ref( $path1 ) && array_ref( $path2 ) or return undef;
2343    
2344        $fraction = 0 if ( ! defined( $fraction ) ) || ( $fraction < 0 );
2345        $fraction = 1 if ( $fraction > 1 );
2346    
2347        my $prefix;
2348        ( $prefix, $path1, $path2 ) = common_and_unique_paths( $path1, $path2 );
2349    
2350        my $dist1 = ( @$path1 >= 3 ) ? newick_path_length( @$path1 ) : 0;
2351        my $dist2 = ( @$path2 >= 3 ) ? newick_path_length( @$path2 ) : 0;
2352    
2353        #  Case where there is no length (possibly same node):
2354    
2355        return reroot_newick_by_path( @$prefix, $path1->[0] ) if $dist1 + $dist2 <= 0;
2356    
2357        my $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2358        my $path = ( $dist <= 0 ) ? $path1 : $path2;
2359        $dist = abs( $dist );
2360    
2361        #  Descend tree until we reach the insertion branch:
2362    
2363        reroot_newick_at_dist_along_path( $prefix, $path, $dist );
2364    }
2365    
2366    
2367    #-------------------------------------------------------------------------------
2368    #  Reroot a newick tree along the path between 2 nodes:
2369    #
2370    #  $tree = reroot_newick_at_dist_between_nodes( $tree, $node1, $node2, $distance )
2371    #
2372    #-------------------------------------------------------------------------------
2373    sub reroot_newick_at_dist_between_nodes
2374    {
2375        my ( $tree, $node1, $node2, $distance ) = @_;
2376        array_ref( $tree ) or return undef;
2377    
2378        #  Find the paths to the nodes:
2379    
2380        my @path1 = path_to_node( $tree, $node1 ) or return $tree;
2381        my @path2 = path_to_node( $tree, $node2 ) or return $tree;
2382    
2383        reroot_newick_at_dist_between_nodes_by_path( \@path1, \@path2, $distance );
2384    }
2385    
2386    
2387    #-------------------------------------------------------------------------------
2388    #  Reroot a newick tree along the path between 2 nodes identified by ref:
2389    #
2390    #  $tree = reroot_newick_at_dist_between_node_refs( $tree, $node1, $node2, $distance )
2391    #
2392    #-------------------------------------------------------------------------------
2393    sub reroot_newick_at_dist_between_node_refs
2394    {
2395        my ( $tree, $node1, $node2, $distance ) = @_;
2396        array_ref( $tree ) or return undef;
2397    
2398        #  Find the paths to the nodes:
2399    
2400        my @path1 = path_to_node_ref( $tree, $node1 ) or return $tree;
2401        my @path2 = path_to_node_ref( $tree, $node2 ) or return $tree;
2402    
2403        reroot_newick_at_dist_between_nodes_by_path( \@path1, \@path2, $distance );
2404    }
2405    
2406    
2407    #-------------------------------------------------------------------------------
2408    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2409    #
2410    #  $tree = reroot_newick_at_dist_between_nodes_by_path( $path1, $path2, $distance )
2411    #
2412    #-------------------------------------------------------------------------------
2413    sub reroot_newick_at_dist_between_nodes_by_path
2414    {
2415        my ( $path1, $path2, $distance ) = @_;
2416        array_ref( $path1 ) && array_ref( $path2 ) or return undef;
2417        $distance = 0 if ( ! defined( $distance ) ) || ( $distance < 0 );
2418    
2419        my $prefix;
2420        ( $prefix, $path1, $path2 ) = common_and_unique_paths( $path1, $path2 );
2421    
2422        my $dist1 = ( @$path1 >= 3 ) ? newick_path_length( @$path1 ) : 0;
2423        my $dist2 = ( @$path2 >= 3 ) ? newick_path_length( @$path2 ) : 0;
2424    
2425        #  Case where there is no length (possibly same node):
2426    
2427        return reroot_newick_by_path( @$prefix, $path1->[0] ) if $dist1 + $dist2 <= 0;
2428    
2429        my ( $path, $dist );
2430        if ( $distance < $dist1 )
2431        {
2432            $path = $path1;
2433            $dist = $dist1 - $distance;
2434        }
2435        else
2436        {
2437            $path = $path2;
2438            $dist = $distance - $dist1;
2439        }
2440    
2441        #  Descend tree until we reach the insertion branch:
2442    
2443        reroot_newick_at_dist_along_path( $prefix, $path, $dist );
2444    }
2445    
2446    
2447    #-------------------------------------------------------------------------------
2448    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2449    #
2450    #  ( \@common, \@unique1, \@unique2 ) = common_and_unique_paths( \@path1, \@path2 )
2451    #
2452    #-------------------------------------------------------------------------------
2453    sub common_and_unique_paths
2454    {
2455        my ( $path1, $path2 ) = @_;
2456    
2457        my @path1 = @$path1;
2458        my @path2 = @$path2;
2459    
2460        #  Trim the common prefix, saving it:
2461    
2462        my $i = 1;
2463        my $imax = min( scalar @path1, scalar @path2 );
2464        while ( ( $i < $imax ) && ( $path1[$i] == $path2[$i] ) ) { $i += 2 }
2465    
2466        my @prefix = ();
2467        if ( $i > 1 ) { @prefix = splice( @path1, 0, $i-1 ); splice( @path2, 0, $i-1 ) }
2468    
2469        ( \@prefix, \@path1, \@path2 );
2470    }
2471    
2472    
2473    #-------------------------------------------------------------------------------
2474    #  Reroot a newick tree at a distance from the most ancestral node along a path:
2475    #
2476    #  $tree = reroot_newick_at_dist_along_path( \@prefix, \@path, $distance )
2477    #
2478    #     -   n1              n1
2479    #     |  /  \            /  \
2480    #     |      \ x2            \ x2
2481    #     |       \               \
2482    #     | dist   n2              n2
2483    #     |       /  \            /  \ x23 = dist - x2
2484    #     |           \               \
2485    #     -----------  \ x3  --------  n23
2486    #                   \             /  \ x3' = x3 - x23
2487    #                    n3               n3
2488    #                   /  \             /  \
2489    #
2490    #-------------------------------------------------------------------------------
2491    sub reroot_newick_at_dist_along_path
2492    {
2493        my ( $prefix, $path, $dist ) = @_;
2494        array_ref( $prefix ) or return undef;
2495        array_ref( $path )   or return $prefix->[0];
2496        defined( $dist )     or $dist = 0;
2497    
2498        my @prefix = @$prefix;
2499        my @path   = @$path;
2500    
2501        #  Descend tree until we reach the insertion branch:
2502    
2503        my $x = ( @path > 2 ) ? newick_x( $path[2] ) : 0;
2504        while ( ( @path > 4 ) && ( $dist > $x ) )
2505        {
2506            $dist -= $x;
2507            push @prefix, splice( @path, 0, 2 );
2508            $x = newick_x( $path[2] );
2509        }
2510        $dist = $x if ( $dist > $x );
2511    
2512        #  Insert the new node:
2513    
2514        my $newnode = [ [ $path[2] ], undef, $dist ];
2515        set_newick_desc_i( $path[0], $path[1], $newnode );
2516        set_newick_x( $path[2], $x - $dist );
2517    
2518        #  We can now build the path from root to the new node
2519    
2520        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2521    }
2522    
2523    
2524    #-------------------------------------------------------------------------------
2525  #  Move root of tree to an approximate midpoint.  #  Move root of tree to an approximate midpoint.
2526  #  #
2527  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $newtree = reroot_newick_to_approx_midpoint( $tree )
2528    #
2529  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2530  sub reroot_newick_to_approx_midpoint {  sub reroot_newick_to_approx_midpoint
2531    {
2532      my ( $tree ) = @_;      my ( $tree ) = @_;
2533    
2534      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2535    
2536      my $dists1 = average_to_tips_1( $tree );      my $dists1 = average_to_tips_1( $tree );
2537    
2538      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint
2539        #  cadidates as a list of [ $node1, $node2, $fraction ]
2540    
2541        my @mids = average_to_tips_2( $dists1, undef, undef );
2542    
2543        #  Reroot to first midpoint candidate
2544    
2545      my $node = average_to_tips_2( $dists1, undef, undef );      return $tree if ! @mids;
2546        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2547        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2548    }
2549    
2550    
2551    #-------------------------------------------------------------------------------
2552    #  Move root of tree to a midpoint.
2553    #
2554    #  $newtree = reroot_newick_to_midpoint( $tree )
2555    #
2556    #-------------------------------------------------------------------------------
2557    sub reroot_newick_to_midpoint
2558    {
2559        my ( $tree ) = @_;
2560    
2561        #  Compile average tip to node distances assending
2562    
2563        my $dists1 = average_to_tips_1( $tree );
2564    
2565        #  Compile average tip to node distances descending, returning midpoint
2566        #  [ $node1, $node2, $fraction ]
2567    
2568      #  Reroot      my @mids = average_to_tips_2( $dists1, undef, undef );
2569    
2570      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2571  }  }
2572    
2573    
2574  sub average_to_tips_1 {  #-------------------------------------------------------------------------------
2575    #  Compile average tip to node distances assending
2576    #-------------------------------------------------------------------------------
2577    sub average_to_tips_1
2578    {
2579      my ( $node ) = @_;      my ( $node ) = @_;
2580    
2581      my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );      my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );
# Line 1843  Line 2585 
2585          foreach ( @desc_dists ) { $x_below += $_->[0] }          foreach ( @desc_dists ) { $x_below += $_->[0] }
2586          $x_below /= @desc_dists;          $x_below /= @desc_dists;
2587      }      }
2588    
2589      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2590      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2591    
# Line 1850  Line 2593 
2593  }  }
2594    
2595    
2596  sub average_to_tips_2 {  #-------------------------------------------------------------------------------
2597    #  Compile average tip to node distances descending, returning midpoint as
2598    #  [ $node1, $node2, $fraction_of_dist_between ]
2599    #-------------------------------------------------------------------------------
2600    sub average_to_tips_2
2601    {
2602      my ( $dists1, $x_above, $anc_node ) = @_;      my ( $dists1, $x_above, $anc_node ) = @_;
2603      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2604    
2605      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2606    
2607      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2608      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2609      {      {
2610          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1872  Line 2616 
2616    
2617          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2618          {          {
2619              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2620          }              #  the way from $node to $anc_node:
2621          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2622          {                                     : 0.5;
2623              return undef;              push @mids, [ $node, $anc_node, $fract ];
2624          }          }
2625      }      }
2626    
2627      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2628    
2629      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2630      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1890  Line 2634 
2634          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2635    
2636          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2637          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2638      }      }
2639    
2640      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2641  }  }
2642    
2643    
# Line 1904  Line 2645 
2645  #  Move root of tree to an approximate midpoint.  Weight by tips.  #  Move root of tree to an approximate midpoint.  Weight by tips.
2646  #  #
2647  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )
2648    #
2649  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2650  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w
2651    {
2652      my ( $tree ) = @_;      my ( $tree ) = @_;
2653        array_ref( $tree ) or return undef;
2654    
2655        #  Compile average tip to node distances assending from tips
2656    
2657        my $dists1 = average_to_tips_1_w( $tree );
2658    
2659        #  Compile average tip to node distances descending, returning midpoints
2660    
2661        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2662    
2663        #  Reroot to first midpoint candidate
2664    
2665        return $tree if ! @mids;
2666        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2667        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2668    }
2669    
2670    
2671    #-------------------------------------------------------------------------------
2672    #  Move root of tree to an approximate midpoint.  Weight by tips.
2673    #
2674    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2675    #
2676    #-------------------------------------------------------------------------------
2677    sub reroot_newick_to_midpoint_w
2678    {
2679        my ( $tree ) = @_;
2680        array_ref( $tree ) or return ();
2681    
2682      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2683    
# Line 1914  Line 2685 
2685    
2686      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2687    
2688      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2689    
2690      #  Reroot      #  Reroot at first candidate midpoint
2691    
2692      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2693  }  }
2694    
2695    
2696  sub average_to_tips_1_w {  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2697    sub average_to_tips_1_w
2698    {
2699      my ( $node ) = @_;      my ( $node ) = @_;
2700    
2701      my @desc_dists = map { average_to_tips_1_w( $_ ) } newick_desc_list( $node );      my @desc_dists = map { average_to_tips_1_w( $_ ) } newick_desc_list( $node );
# Line 1939  Line 2712 
2712          }          }
2713          $x_below /= $n_below;          $x_below /= $n_below;
2714      }      }
2715    
2716      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2717      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2718    
# Line 1946  Line 2720 
2720  }  }
2721    
2722    
2723  sub average_to_tips_2_w {  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2724    sub average_to_tips_2_w
2725    {
2726      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;
2727      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;
2728    
2729      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2730    
2731      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2732      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2733      {      {
2734          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1964  Line 2736 
2736          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2737          #  path from the root of the current tree).          #  path from the root of the current tree).
2738          #          #
2739          #  Is the root in the current branch?          #  Is their a root in the current branch?
2740    
2741          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2742          {          {
2743              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2744          }              #  the way from $node to $anc_node:
2745          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2746          {                                     : 0.5;
2747              return undef;              push @mids, [ $node, $anc_node, $fract ];
2748          }          }
2749      }      }
2750    
# Line 1992  Line 2764 
2764    
2765          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2766                                   : 0;                                   : 0;
2767          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2768      }      }
2769    
2770      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2771  }  }
2772    
2773    
# Line 2006  Line 2775 
2775  #  Move root of tree from tip to adjacent node.  #  Move root of tree from tip to adjacent node.
2776  #  #
2777  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
2778    #
2779  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2780  sub uproot_tip_rooted_newick {  sub uproot_tip_rooted_newick
2781    {
2782      my ( $node ) = @_;      my ( $node ) = @_;
2783      newick_is_tip_rooted( $node ) || return $node;      newick_is_tip_rooted( $node ) || return $node;
2784    
# Line 2023  Line 2794 
2794  #  Root node label, label comment and descendant list comment are discarded.  #  Root node label, label comment and descendant list comment are discarded.
2795  #  #
2796  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
2797    #
2798  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2799  sub uproot_newick {  sub uproot_newick
2800    {
2801      my ( $node0 ) = @_;      my ( $node0 ) = @_;
2802      newick_is_rooted( $node0 ) || return $node0;      newick_is_rooted( $node0 ) || return $node0;
2803    
# Line 2071  Line 2844 
2844  #  Prefix branch of node2 to that of node1:  #  Prefix branch of node2 to that of node1:
2845  #  #
2846  #  $node1 = add_to_newick_branch( $node1, $node2 )  #  $node1 = add_to_newick_branch( $node1, $node2 )
2847    #
2848  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2849  sub add_to_newick_branch {  sub add_to_newick_branch
2850    {
2851      my ( $node1, $node2 ) = @_;      my ( $node1, $node2 ) = @_;
2852      array_ref( $node1 ) || die "add_to_newick_branch: arg 1 not array ref\n";      array_ref( $node1 ) || die "add_to_newick_branch: arg 1 not array ref\n";
2853      array_ref( $node2 ) || die "add_to_newick_branch: arg 2 not array ref\n";      array_ref( $node2 ) || die "add_to_newick_branch: arg 2 not array ref\n";
# Line 2110  Line 2885 
2885  #  #
2886  #  $tree = collapse_zero_length_branches( $tree )  #  $tree = collapse_zero_length_branches( $tree )
2887  #  $tree = collapse_zero_length_branches( $tree, $not_root )  #  $tree = collapse_zero_length_branches( $tree, $not_root )
2888    #
2889  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2890  sub collapse_zero_length_branches {  sub collapse_zero_length_branches
2891    {
2892      my ( $tree, $not_root ) = @_;      my ( $tree, $not_root ) = @_;
2893      array_ref( $tree ) || return undef;      array_ref( $tree ) || return undef;
2894    
# Line 2148  Line 2925 
2925  #  Add a subtree to a newick tree node:  #  Add a subtree to a newick tree node:
2926  #  #
2927  #  $node = newick_insert_at_node( $node, $subtree )  #  $node = newick_insert_at_node( $node, $subtree )
2928    #
2929  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2930  sub newick_insert_at_node  sub newick_insert_at_node
2931  {  {
# Line 2173  Line 2951 
2951  #  Insert a subtree into a newick tree along the path between 2 nodes:  #  Insert a subtree into a newick tree along the path between 2 nodes:
2952  #  #
2953  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2954    #
2955  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2956  sub newick_insert_between_nodes  sub newick_insert_between_nodes
2957  {  {
# Line 2241  Line 3020 
3020  #  $newtree = prune_from_newick( $tree,  $tip  )  #  $newtree = prune_from_newick( $tree,  $tip  )
3021  #  $newtree = prune_from_newick( $tree,  @tips )  #  $newtree = prune_from_newick( $tree,  @tips )
3022  #  $newtree = prune_from_newick( $tree, \@tips )  #  $newtree = prune_from_newick( $tree, \@tips )
3023    #
3024  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3025  sub prune_from_newick {  sub prune_from_newick
3026    {
3027      my ( $tr, @tips ) = @_;      my ( $tr, @tips ) = @_;
3028      if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }      if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
3029    
# Line 2259  Line 3040 
3040  #  Prune a tip from a tree:  #  Prune a tip from a tree:
3041  #  #
3042  #  $newtree = prune_1_from_newick( $tree, $tip )  #  $newtree = prune_1_from_newick( $tree, $tip )
3043    #
3044  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3045  sub prune_1_from_newick {  sub prune_1_from_newick
3046    {
3047      my ( $tr, $tip ) = @_;      my ( $tr, $tip ) = @_;
3048      my @path = path_to_tip( $tr, $tip );      my @path = path_to_tip( $tr, $tip );
3049      if ( @path < 3 ) { return $tr }      if ( @path < 3 ) { return $tr }
# Line 2299  Line 3082 
3082          $tr = $sis;                              #    sister is new root          $tr = $sis;                              #    sister is new root
3083      }      }
3084    
3085      elsif ( $nd1 == 3 ) {                        # Tip joins trifurcating root:      elsif ( $nd1 == 3 ) {                        # Tip joins trifurcating root:
3086          splice( @{ $anc1->[0] }, $i1-1, 1 );     #    delete the descendant, and          splice( @{ $anc1->[0] }, $i1-1, 1 );     #    delete the descendant, and
3087          $tr = uproot_newick( $tr );              #    fix the rooting          $tr = uproot_newick( $tr );              #    fix the rooting
3088        }
3089    
3090        else {
3091            return undef;
3092        }
3093    
3094        return $tr;
3095    }
3096    
3097    
3098    #-------------------------------------------------------------------------------
3099    #  Produce a potentially rooted subtree with the desired tips:
3100    #
3101    #     Except for (some) tip nodes, the tree produced is a copy.
3102    #     There is no check that requested tips exist.
3103    #
3104    #  $newtree = rooted_newick_subtree( $tree,  @tips )
3105    #  $newtree = rooted_newick_subtree( $tree, \@tips )
3106    #
3107    #-------------------------------------------------------------------------------
3108    sub rooted_newick_subtree
3109    {
3110        my ( $tr, @tips ) = @_;
3111        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
3112    
3113        if ( @tips < 2 ) { return undef }
3114        my $keephash = { map { ( $_, 1 ) } @tips };
3115        my $tr2 = subtree1( $tr, $keephash );
3116        $tr2->[2] = undef if $tr2;                   # undef root branch length
3117        $tr2;
3118    }
3119    
3120    
3121    #-------------------------------------------------------------------------------
3122    #  Produce a subtree with the desired tips:
3123    #
3124    #     Except for (some) tip nodes, the tree produced is a copy.
3125    #     There is no check that requested tips exist.
3126    #
3127    #  $newtree = newick_subtree( $tree,  @tips )
3128    #  $newtree = newick_subtree( $tree, \@tips )
3129    #
3130    #-------------------------------------------------------------------------------
3131    sub newick_subtree
3132    {
3133        my ( $tr, @tips ) = @_;
3134        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
3135    
3136        if ( @tips < 2 ) { return undef }
3137        my $was_rooted = newick_is_rooted( $tr );
3138        my $keephash = { map { ( $_, 1 ) } @tips };
3139        my $tr2 = subtree1( $tr, $keephash );
3140        $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 );
3141        $tr2->[2] = undef if $tr2;                   # undef root branch length
3142        $tr2;
3143    }
3144    
3145    
3146    sub subtree1
3147    {
3148        my ( $tr, $keep ) = @_;
3149        my @desc1 = newick_desc_list( $tr );
3150    
3151        #  Is this a tip, and is it in the keep list?
3152    
3153        if ( @desc1 < 1 ) {
3154            return ( $keep->{ newick_lbl( $tr ) } ) ? $tr : undef;
3155        }
3156    
3157        #  Internal node: analyze the descendants:
3158    
3159        my @desc2 = ();
3160        foreach ( @desc1 ) {
3161            my $desc = subtree1( $_, $keep );
3162            if ( $desc && @$desc ) { push @desc2, $desc }
3163        }
3164    
3165        if ( @desc2 == 0 ) { return undef }
3166        if ( @desc2 >  1 ) { return [ \@desc2, @$tr[ 1 .. @$tr - 1 ] ] }
3167    
3168        #  Exactly 1 descendant
3169    
3170        my $desc = $desc2[ 0 ];
3171        my @nn = ( $desc->[0],
3172                   $desc->[1] ? $desc->[1] : $tr->[1],
3173                   defined( $tr->[2] ) ? $desc->[2] + $tr->[2] : undef
3174                 );
3175    
3176        #  Merge comments (only recreating the ones that existed):
3177    
3178        if ( $tr->[3] && @{$tr->[3]} || $desc->[3] && @{$desc->[3]} ) {
3179            $nn[3] = [ $tr->[3] ? @{$tr->[3]} : (), $desc->[3] ? @{$desc->[3]} : () ];
3180        }
3181        if ( $tr->[4] && @{$tr->[4]} || $desc->[4] && @{$desc->[4]} ) {
3182            $nn[4] = [ $tr->[4] ? @{$tr->[4]} : (), $desc->[4] ? @{$desc->[4]} : () ];
3183        }
3184        if ( $tr->[5] && @{$tr->[5]} || $desc->[5] && @{$desc->[5]} ) {
3185            $nn[5] = [ $tr->[5] ? @{$tr->[5]} : (), $desc->[5] ? @{$desc->[5]} : () ];
3186        }
3187        if ( $tr->[6] && @{$tr->[6]} || $desc->[6] && @{$desc->[6]} ) {
3188            $nn[6] = [ $tr->[6] ? @{$tr->[6]} : (), $desc->[6] ? @{$desc->[6]} : () ];
3189        }
3190        if ( $tr->[7] && @{$tr->[7]} || $desc->[7] && @{$desc->[7]} ) {
3191            $nn[7] = [ $tr->[7] ? @{$tr->[7]} : (), $desc->[7] ? @{$desc->[7]} : () ];
3192        }
3193    
3194        return \@nn;
3195    }
3196    
3197    
3198    #-------------------------------------------------------------------------------
3199    #  The smallest subtree of rooted tree that includes @tips:
3200    #
3201    #    $node = newick_covering_subtree( $tree,  @tips )
3202    #    $node = newick_covering_subtree( $tree, \@tips )
3203    #
3204    #-------------------------------------------------------------------------------
3205    
3206    sub newick_covering_subtree
3207    {
3208        my $tree = shift;
3209        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
3210    
3211        #  Return smallest covering node, if any:
3212    
3213        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
3214    }
3215    
3216    
3217    sub newick_covering_subtree_1
3218    {
3219        my ( $node, $tips ) = @_;
3220        my $n_cover = 0;
3221        my @desc = newick_desc_list( $node );
3222        if ( @desc )
3223        {
3224            foreach ( @desc )
3225            {
3226                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
3227                return ( $subtree, $n ) if $subtree;
3228                $n_cover += $n;
3229            }
3230        }
3231        elsif ( $tips->{ newick_lbl( $node ) } )
3232        {
3233            $n_cover++;
3234        }
3235    
3236        #  If all tips are covered, return node
3237    
3238        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
3239    }
3240    
3241    
3242    #===============================================================================
3243    #
3244    #  Representative subtrees
3245    #
3246    #===============================================================================
3247    #  Find subtree of size n representating vicinity of the root:
3248    #
3249    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
3250    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
3251    #
3252    #  Note that if $tree is rooted, then the subtree will also be.  This can have
3253    #  consequences on downstream programs.
3254    #-------------------------------------------------------------------------------
3255    sub root_neighborhood_representative_tree
3256    {
3257        my ( $tree, $n, $tip_priority ) = @_;
3258        array_ref( $tree ) && ( $n >= 2 ) or return undef;
3259        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
3260    
3261        $tip_priority ||= default_tip_priority( $tree );
3262        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
3263                   root_proximal_newick_subtrees( $tree, $n );
3264    
3265        newick_subtree( copy_newick_tree( $tree ), \@tips );
3266    }
3267    
3268    
3269    #-------------------------------------------------------------------------------
3270    #  Find n tips to represent tree lineages in vicinity of another tip.
3271    #  Default tip priority is short total branch length.
3272    #
3273    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
3274    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
3275    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
3276    #   @tips = root_neighborhood_representative_tips( $tree, $n )
3277    #
3278    #-------------------------------------------------------------------------------
3279    sub root_neighborhood_representative_tips
3280    {
3281        my ( $tree, $n, $tip_priority ) = @_;
3282        array_ref( $tree ) && ( $n >= 2 ) or return undef;
3283    
3284        my @tips;
3285        if ( newick_tip_count( $tree ) <= $n )
3286        {
3287            @tips = newick_tip_list( $tree );
3288        }
3289        else
3290        {
3291            $tip_priority ||= default_tip_priority( $tree );
3292            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
3293                    root_proximal_newick_subtrees( $tree, $n );
3294        }
3295    
3296        wantarray ? @tips : \@tips;
3297    }
3298    
3299    
3300    #-------------------------------------------------------------------------------
3301    #  Find subtree of size n representating vicinity of a tip:
3302    #
3303    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
3304    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
3305    #
3306    #-------------------------------------------------------------------------------
3307    sub tip_neighborhood_representative_tree
3308    {
3309        my ( $tree, $tip, $n, $tip_priority ) = @_;
3310        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
3311        newick_tip_in_tree( $tree, $tip ) or return undef;
3312    
3313        my $tree1 = copy_newick_tree( $tree );
3314        if ( newick_tip_count( $tree1 ) - 1 <= $n )
3315        {
3316            return prune_from_newick( $tree1, $tip )
3317        }
3318    
3319        $tree1 = reroot_newick_to_tip( $tree1, $tip );
3320        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
3321        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
3322        newick_subtree( copy_newick_tree( $tree ), \@tips );
3323    }
3324    
3325    
3326    #-------------------------------------------------------------------------------
3327    #  Find n tips to represent tree lineages in vicinity of another tip.
3328    #  Default tip priority is short total branch length.
3329    #
3330    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
3331    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
3332    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
3333    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
3334    #
3335    #-------------------------------------------------------------------------------
3336    sub tip_neighborhood_representative_tips
3337    {
3338        my ( $tree, $tip, $n, $tip_priority ) = @_;
3339        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
3340        newick_tip_in_tree( $tree, $tip ) or return undef;
3341    
3342        my @tips = newick_tip_list( $tree );
3343        if ( newick_tip_count( $tree ) - 1 <= $n )
3344        {
3345            @tips = grep { $_ ne $tip } @tips;
3346        }
3347        else
3348        {
3349            my $tree1 = copy_newick_tree( $tree );
3350            $tree1 = reroot_newick_to_tip( $tree1, $tip );
3351            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
3352            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
3353        }
3354    
3355        wantarray ? @tips : \@tips;
3356    }
3357    
3358    
3359    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3360    #  Anonymous hash of the negative distance from root to each tip:
3361    #
3362    #   \%tip_priority = default_tip_priority( $tree )
3363    #
3364    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3365    sub default_tip_priority
3366    {
3367        my ( $tree ) = @_;
3368        my $tip_distances = newick_tip_distances( $tree ) || {};
3369        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
3370    }
3371    
3372    
3373    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3374    #  Select a tip from a subtree base on a priority value:
3375    #
3376    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
3377    #
3378    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3379    sub representative_tip_of_newick_node
3380    {
3381        my ( $node, $tip_priority ) = @_;
3382        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
3383                      map  { [ $_, $tip_priority->{ $_ } ] }
3384                      newick_tip_list( $node );
3385        $tip->[0];                                   # Label from label-priority pair
3386    }
3387    
3388    
3389    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3390    #  Find n subtrees focused around the root of a tree.  Typically each will
3391    #  then be reduced to a single tip to make a representative tree:
3392    #
3393    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
3394    #
3395    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3396    sub root_proximal_newick_subtrees
3397    {
3398        my ( $tree, $n ) = @_;
3399        my $node_start_end = newick_branch_intervals( $tree );
3400        n_representative_branches( $n, $node_start_end );
3401    }
3402    
3403    
3404    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3405    #   @node_start_end = newick_branch_intervals( $tree )
3406    #  \@node_start_end = newick_branch_intervals( $tree )
3407    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3408    sub newick_branch_intervals
3409    {
3410        my ( $node, $parent_x ) = @_;
3411        $parent_x ||= 0;
3412        my ( $desc, undef, $dx ) = @$node;
3413        my $x = $parent_x + $dx;
3414        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
3415        my @intervals = ( $interval,
3416                          map { &newick_branch_intervals( $_, $x ) } @$desc
3417                        );
3418        return wantarray ? @intervals : \@intervals;
3419    }
3420    
3421    
3422    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3423    #   @ids = n_representative_branches( $n,  @id_start_end )
3424    #   @ids = n_representative_branches( $n, \@id_start_end )
3425    #  \@ids = n_representative_branches( $n,  @id_start_end )
3426    #  \@ids = n_representative_branches( $n, \@id_start_end )
3427    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3428    sub n_representative_branches
3429    {
3430        my $n = shift;
3431        #  Sort intervals by start point:
3432        my @unprocessed = sort { $a->[1] <=> $b->[1] }
3433                          ( @_ == 1 ) ? @{ $_[0] } : @_;
3434        my @active = ();
3435        my ( $interval, $current_point );
3436        foreach $interval ( @unprocessed )
3437        {
3438            $current_point = $interval->[1];
3439            #  Filter out intervals that have ended.  This is N**2 in the number
3440            #  of representatives.  Fixing this would require maintaining a sorted
3441            #  active list.
3442            @active = grep { $_->[2] > $current_point } @active;
3443            push @active, $interval;
3444            last if ( @active >= $n );
3445        }
3446    
3447        my @ids = map { $_->[0] } @active;
3448        return wantarray() ? @ids : \@ids;
3449    }
3450    
3451    
3452    #===============================================================================
3453    #  Random trees
3454    #===============================================================================
3455    #
3456    #   $tree = random_equibranch_tree(  @tips, \%options )
3457    #   $tree = random_equibranch_tree( \@tips, \%options )
3458    #   $tree = random_equibranch_tree(  @tips )
3459    #   $tree = random_equibranch_tree( \@tips )
3460    #
3461    #  Options:
3462    #
3463    #     length => $branch_length   # D = 1
3464    #
3465    #-------------------------------------------------------------------------------
3466    sub random_equibranch_tree
3467    {
3468        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3469                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3470                 :                                  {};
3471        return undef if ! defined $_[0];
3472    
3473        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3474        return undef if @tips < 2;
3475    
3476        my $len = $opts->{ length } ||= 1;
3477    
3478        if ( @tips == 2 )
3479        {
3480            return [ [ map { [ [], $_, $len ] } @tips ], undef, 0 ];
3481        }
3482    
3483        my $tree = [ [ ], undef, 0 ];
3484    
3485        my @links;  # \$anc_dl[i], i.e. a reference to an element in a descendent list
3486    
3487        my $anc_dl = $tree->[0];
3488        foreach my $tip ( splice( @tips, 0, 3 ) )
3489        {
3490            my $node = [ [], $tip, $len ];
3491            push @$anc_dl, $node;
3492            push @links, \$anc_dl->[-1];  #  Ref to the just added descendent list entry
3493      }      }
3494    
3495      else {      foreach my $tip ( @tips )
3496          return undef;      {
3497            my $link    = $links[ int( rand( scalar @links ) ) ];
3498            my $newtip  = [ [], $tip, $len ];
3499            my $new_dl  = [ $$link, $newtip ];
3500            my $newnode = [ $new_dl, undef, $len ];
3501            $$link = $newnode;
3502            push @links, \$new_dl->[0], \$new_dl->[1]
3503      }      }
3504    
3505      return $tr;      return $tree;
3506  }  }
3507    
3508    
3509  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
 #  Produce a subtree with the desired tips:  
3510  #  #
3511  #     Except for (some) tip nodes, the tree produced is a copy.  #   $tree = random_ultrametric_tree(  @tips, \%options )
3512  #     There is no check that requested tips exist.  #   $tree = random_ultrametric_tree( \@tips, \%options )
3513    #   $tree = random_ultrametric_tree(  @tips )
3514    #   $tree = random_ultrametric_tree( \@tips )
3515    #
3516    #  Options:
3517    #
3518    #     depth => $root_to_tip_dist   # D = 1
3519  #  #
 #  $newtree = newick_subtree( $tree,  @tips )  
 #  $newtree = newick_subtree( $tree, \@tips )  
3520  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3521  sub newick_subtree {  sub random_ultrametric_tree
3522      my ( $tr, @tips ) = @_;  {
3523      if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }      my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3524                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3525                 :                                  {};
3526        return undef if ! defined $_[0];
3527    
3528      if ( @tips < 2 ) { return undef }      my @tips = ref $_[0] ? @{ $_[0] } : @_;
3529      my $was_rooted = newick_is_rooted( $tr );      return undef if @tips < 2;
     my $keephash = { map { ( $_, 1 ) } @tips };  
     my $tr2 = subtree1( $tr, $keephash );  
     $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 );  
     $tr2->[2] = undef if $tr2;                   # undef root branch length  
     $tr2;  
 }  
3530    
3531        my $d2tip = $opts->{ depth } ||= 1;
3532    
3533  sub subtree1 {      #  Random tip addition order (for rooted tree it matters):
     my ( $tr, $keep ) = @_;  
     my @desc1 = newick_desc_list( $tr );  
3534    
3535      #  Is this a tip, and is it in the keep list?      @tips = sort { rand() <=> 0.5 } @tips;
3536        my $tree = [ [ ], undef, 0 ];
3537    
3538      if ( @desc1 < 1 ) {      my $subtree_size = { $tree => 0 };  # total branch length of each subtree
3539          return ( $keep->{ newick_lbl( $tr ) } ) ? $tr : undef;  
3540        #  We start with root bifurcation:
3541    
3542        foreach my $tip ( splice( @tips, 0, 2 ) )
3543        {
3544            my $node = [ [], $tip, $d2tip ];
3545            push @{ $tree->[0] }, $node;
3546            $subtree_size->{ $node }  = $d2tip;
3547            $subtree_size->{ $tree } += $d2tip;
3548      }      }
3549    
3550      #  Internal node: analyze the descendants:      #  Add each remaining tip at $pos, measured along the contour length
3551        #  of the tree (with no retracing along branches).
3552    
3553      my @desc2 = ();      foreach my $tip ( @tips )
3554      foreach ( @desc1 ) {      {
3555          my $desc = subtree1( $_, $keep );          my $pos = rand( $subtree_size->{ $tree } );
3556          if ( $desc && @$desc ) { push @desc2, $desc }          random_add_to_ultrametric_tree( $tree, $tip, $subtree_size, $pos, $d2tip );
3557      }      }
3558    
3559      if ( @desc2 == 0 ) { return undef }      return $tree;
3560      if ( @desc2 >  1 ) { return [ \@desc2, @$tr[ 1 .. @$tr - 1 ] ] }  }
3561    
     #  Exactly 1 descendant  
3562    
3563      my $desc = $desc2[ 0 ];  sub random_add_to_ultrametric_tree
3564      my @nn = ( $desc->[0],  {
3565                 $desc->[1] ? $desc->[1] : $tr->[1],      my ( $node, $tip, $subtree_size, $pos, $d2tip ) = @_;
3566                 defined( $tr->[2] ) ? $desc->[2] + $tr->[2] : undef      $node && $node->[0] && ref $node->[0] eq 'ARRAY' or die "Bad tree node passed to random_add_to_ultrametric_tree().\n";
              );  
3567    
3568      #  Merge comments (only recreating the ones that existed):      # Find the descendent line that it goes in:
3569    
3570      if ( $tr->[3] && @{$tr->[3]} || $desc->[3] && @{$desc->[3]} ) {      my $i;
3571          $nn[3] = [ $tr->[3] ? @{$tr->[3]} : (), $desc->[3] ? @{$desc->[3]} : () ];      my $dl = $node->[0];
3572      }      my $size0 = $subtree_size->{ $dl->[0] };
3573      if ( $tr->[4] && @{$tr->[4]} || $desc->[4] && @{$desc->[4]} ) {      if ( $size0 > $pos ) { $i = 0 } else { $i = 1; $pos -= $size0 }
3574          $nn[4] = [ $tr->[4] ? @{$tr->[4]} : (), $desc->[4] ? @{$desc->[4]} : () ];      my $desc = $dl->[$i];
3575      }  
3576      if ( $tr->[5] && @{$tr->[5]} || $desc->[5] && @{$desc->[5]} ) {      # Does it go within the subtree, or the branch to the subtree?
3577          $nn[5] = [ $tr->[5] ? @{$tr->[5]} : (), $desc->[5] ? @{$desc->[5]} : () ];  
3578      }      my $len;
3579      if ( $tr->[6] && @{$tr->[6]} || $desc->[6] && @{$desc->[6]} ) {      my $added;
3580          $nn[6] = [ $tr->[6] ? @{$tr->[6]} : (), $desc->[6] ? @{$desc->[6]} : () ];      if ( ( $len = $desc->[2] ) <= $pos )
3581        {
3582            $added = random_add_to_ultrametric_tree( $desc, $tip, $subtree_size, $pos - $len, $d2tip - $len );
3583      }      }
3584      if ( $tr->[7] && @{$tr->[7]} || $desc->[7] && @{$desc->[7]} ) {      else
3585          $nn[7] = [ $tr->[7] ? @{$tr->[7]} : (), $desc->[7] ? @{$desc->[7]} : () ];      {
3586            # If not in subtree, then it goes in the branch to the descendent node
3587            #
3588            #     ----- node  ------------       node
3589            #       ^   /  \       ^             /  \
3590            #       |       \      | pos             \l1
3591            #       |        \     v                  \
3592            #       |      len\ ----------         newnode
3593            #       |          \                     /  \ l2
3594            # d2tip |           \                   /    \
3595            #       |           desc               /     desc
3596            #       |           /  \            l3/      /  \
3597            #       |          .    .            /      .    .
3598            #       v         .      .          /      .      .
3599            #     -----      .        .     newtip    .        .
3600    
3601            my $l1      = $pos;
3602            my $l2      = $len   - $pos;
3603            my $l3      = $d2tip - $pos;
3604            my $newtip  = [ [], $tip, $l3 ];
3605            my $newnode = [ [ $desc, $newtip ], undef, $l1 ];
3606            $dl->[$i]   = $newnode;
3607            $subtree_size->{ $newtip  } = $l3;
3608            $subtree_size->{ $newnode } = $subtree_size->{ $desc } + $l3;
3609            $desc->[2] = $l2;
3610            $subtree_size->{ $desc } -= $l1;
3611            $added = $l3;
3612      }      }
3613    
3614      return \@nn;      #  New branch was inserted below this point:
3615    
3616        $subtree_size->{ $node } += $added;
3617        return $added;
3618  }  }
3619    
3620    
3621    
3622  #===============================================================================  #===============================================================================
3623  #  #
3624  #  Tree writing and reading  #  Tree writing and reading
3625  #  #
3626  #===============================================================================  #===============================================================================
3627    #
3628  #  writeNewickTree( $tree )  #  writeNewickTree( $tree )
3629  #  writeNewickTree( $tree, $file )  #  writeNewickTree( $tree, $file )
3630  #  writeNewickTree( $tree, \*FH )  #  writeNewickTree( $tree, \*FH )
3631    #
3632  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3633  sub writeNewickTree {  sub writeNewickTree
3634    {
3635      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
3636      my ( $fh, $close ) = open_output( $file );      my ( $fh, $close ) = open_output( $file );
3637      $fh or return;      $fh or return;
# Line 2413  Line 3649 
3649  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3650  #  $treestring = strNewickTree( $tree )  #  $treestring = strNewickTree( $tree )
3651  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3652  sub strNewickTree {  sub strNewickTree
3653    {
3654      my $node = shift @_;      my $node = shift @_;
3655      strNewickSubtree( $node, "" ) . ";";      strNewickSubtree( $node, "" ) . ";";
3656  }  }
# Line 2422  Line 3659 
3659  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3660  #  $string = strNewickSubtree( $node, $prefix )  #  $string = strNewickSubtree( $node, $prefix )
3661  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3662  sub strNewickSubtree {  sub strNewickSubtree
3663    {
3664      my ( $node, $prefix ) = @_;      my ( $node, $prefix ) = @_;
3665      my  $s;      my  $s;
3666    
# Line 2440  Line 3678 
3678          $prefix = " ";          $prefix = " ";
3679      }      }
3680    
3681      if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) {      if ( node_has_lbl( $node ) ) {
3682          $s .= $prefix          $s .= $prefix
3683             .  q_newick_lbl( $node )             .  q_newick_lbl( $node )
3684             .  strNewickComments( newick_c3( $node ), " " );             .  strNewickComments( newick_c3( $node ), " " );
# Line 2460  Line 3698 
3698  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3699  #  $string = strNewickComments( $clist, $prefix )  #  $string = strNewickComments( $clist, $prefix )
3700  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3701  sub strNewickComments {  sub strNewickComments
3702    {
3703      my ( $clist, $prefix ) = @_;      my ( $clist, $prefix ) = @_;
3704      array_ref( $clist ) && ( @$clist > 0 ) || return  "";      array_ref( $clist ) && ( @$clist > 0 ) || return  "";
3705      $prefix . "[" . join( "] [", @$clist ) . "]";      $prefix . "[" . join( "] [", @$clist ) . "]";
# Line 2470  Line 3709 
3709  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3710  #  $quoted_label = q_newick_lbl( $label )  #  $quoted_label = q_newick_lbl( $label )
3711  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3712  sub q_newick_lbl {  sub q_newick_lbl
3713      my $lbl = newick_lbl( $_[0] );  {
3714      defined( $lbl ) && ( $lbl ne "" ) || return undef;      node_has_lbl( $_[0] ) || return undef;
3715    
3716        my $lbl = newick_lbl( $_[0] );
3717      if ( $lbl =~ m/^[^][()_:;,]+$/        #  Anything but []()_:;,      if ( $lbl =~ m/^[^][()_:;,]+$/        #  Anything but []()_:;,
3718        && $lbl !~ m/^'/  ) {               #     and does not start with '        && $lbl !~ m/^'/  ) {               #     and does not start with '
3719          $lbl =~ s/ /_/g;                  #  Recode blanks as _          $lbl =~ s/ /_/g;                  #  Recode blanks as _
# Line 2488  Line 3728 
3728    
3729    
3730  #===============================================================================  #===============================================================================
3731    #
3732  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
3733    #
3734  #===============================================================================  #===============================================================================
3735  sub formatNewickTree {  sub formatNewickTree
3736    {
3737      formatNewickSubtree( $_[0], "", "" ) . ";";      formatNewickSubtree( $_[0], "", "" ) . ";";
3738  }  }
3739    
# Line 2498  Line 3741 
3741  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3742  #  $string = formatNewickSubtree( $node, $prefix, $indent )  #  $string = formatNewickSubtree( $node, $prefix, $indent )
3743  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3744  sub formatNewickSubtree {  sub formatNewickSubtree
3745    {
3746      my ( $node, $prefix, $indent ) = @_;      my ( $node, $prefix, $indent ) = @_;
3747      my  $s;      my  $s;
3748    
# Line 2515  Line 3759 
3759          $prefix = " ";          $prefix = " ";
3760      }      }
3761    
3762      if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) {      if ( node_has_lbl( $node ) ) {
3763          $s .= $prefix          $s .= $prefix
3764             .  q_newick_lbl( $node )             .  q_newick_lbl( $node )
3765             .  formatNewickComments( newick_c3( $node ), " ", $indent );             .  formatNewickComments( newick_c3( $node ), " ", $indent );
# Line 2535  Line 3779 
3779  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3780  #  $string = formatNewickComments( $clist, $prefix, $indent )  #  $string = formatNewickComments( $clist, $prefix, $indent )
3781  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3782  sub formatNewickComments {  sub formatNewickComments
3783    {
3784      my ( $clist, $prefix, $indent ) = @_;      my ( $clist, $prefix, $indent ) = @_;
3785      array_ref( $clist ) && @$clist || return  "";      array_ref( $clist ) && @$clist || return  "";
3786      $prefix . "[" . join( "] [", @$clist ) . "]";      $prefix . "[" . join( "] [", @$clist ) . "]";
# Line 2543  Line 3788 
3788    
3789    
3790  #===============================================================================  #===============================================================================
3791    #
3792  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  $tree  = read_newick_tree( $file )  # reads to a semicolon
3793  #  @trees = read_newick_trees( $file ) # reads to end of file  #  @trees = read_newick_trees( $file ) # reads to end of file
3794    #
3795  #===============================================================================  #===============================================================================
3796    
3797  sub read_newick_tree  sub read_newick_tree
# Line 2553  Line 3800 
3800      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3801      my $tree;      my $tree;
3802      my @lines = ();      my @lines = ();
3803      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3804      {      {
3805          chomp;          chomp;
3806          push @lines, $_;          push @lines, $_;
# Line 2575  Line 3822 
3822      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3823      my @trees = ();      my @trees = ();
3824      my @lines = ();      my @lines = ();
3825      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3826      {      {
3827          chomp;          chomp;
3828          push @lines, $_;          push @lines, $_;
# Line 2595  Line 3842 
3842  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
3843  #  #
3844  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
3845    #
3846  #===============================================================================  #===============================================================================
3847  sub parse_newick_tree_str {  sub parse_newick_tree_str
3848    {
3849      my $s = shift @_;      my $s = shift @_;
3850    
3851      my ( $ind, $rootnode ) = parse_newick_subtree( $s, 0 );      my ( $ind, $rootnode ) = parse_newick_subtree( $s, 0 );
# Line 2608  Line 3857 
3857  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3858  #  Read a subtrees recursively (everything of tree but a semicolon)  #  Read a subtrees recursively (everything of tree but a semicolon)
3859  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3860  sub parse_newick_subtree {  sub parse_newick_subtree
3861    {
3862      my ( $s, $ind ) = @_;      my ( $s, $ind ) = @_;
3863    
3864      my $newnode = [];      my $newnode = [];
# Line 2662  Line 3912 
3912  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3913  #  Read a Newick tree label  #  Read a Newick tree label
3914  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3915  sub parseTreeNodeLabel {  #  Empty string is permitted  sub parseTreeNodeLabel
3916    {  #  Empty string is permitted
3917      my ( $s, $ind ) = @_;      my ( $s, $ind ) = @_;
3918      my ( $lbl, $c );      my ( $lbl, $c );
3919    
# Line 2697  Line 3948 
3948  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3949  #  Read a Newick tree branch length  #  Read a Newick tree branch length
3950  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3951  sub parseBranchLength {  sub parseBranchLength
3952    {
3953      my ( $s, $ind ) = @_;      my ( $s, $ind ) = @_;
3954    
3955      my $c = substr( $s, $ind, 1 );      my $c = substr( $s, $ind, 1 );
# Line 2751  Line 4003 
4003  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4004  #  ( $index, /@commentlist ) = getNextTreeChar( $string, $index )  #  ( $index, /@commentlist ) = getNextTreeChar( $string, $index )
4005  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4006  sub getNextTreeChar {       #  Move to next nonblank, noncomment character  sub getNextTreeChar
4007    {       #  Move to next nonblank, noncomment character
4008      my ( $s, $ind ) = @_;      my ( $s, $ind ) = @_;
4009    
4010      my @clist = ();      my @clist = ();
# Line 2762  Line 4015 
4015      #  Loop while it is a comment:      #  Loop while it is a comment:
4016      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
4017          $ind++;          $ind++;
4018            my $depth = 1;
4019            my $ind2  = $ind;
4020    
4021          #  Find end          #  Find end
4022          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
4023            {
4024                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
4025                {
4026                    $ind2 += length( $1 );  #  Points at char just past [
4027                    $depth++;               #  If nested comments are allowed
4028                }
4029                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
4030                {
4031                    $ind2 += length( $1 );  #  Points at char just past ]
4032                    $depth--;
4033                }
4034                else
4035                {
4036              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
4037                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
4038          }          }
4039          my $comment = $1;          }
4040    
4041          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
4042          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
4043    
4044          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
4045    
4046          #  Skip white space          #  Skip white space
4047          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2792  Line 4060 
4060  #===============================================================================  #===============================================================================
4061  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
4062  #  #
4063  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
4064  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
4065  #     $width  the approximate characters for the tree without labels  #
4066  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
4067  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
4068    #     $width  # the approximate characters for the tree without labels (D = 68)
4069    #     $min_dx # the minimum horizontal branch length (D = 2)
4070    #     $dy     # the vertical space per taxon (D = 1, most compressed)
4071    #
4072    #  Options:
4073    #
4074    #    dy     => nat_number    # the vertical space per taxon
4075    #    chars  => key           # line drawing character set:
4076    #                            #     html_unicode
4077    #                            #     text (default)
4078    #    min_dx => whole_number  # the minimum horizontal branch length
4079    #    width  => whole_number  # approximate tree width without labels
4080  #  #
 #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  
4081  #===============================================================================  #===============================================================================
4082  sub printer_plot_newick {  sub printer_plot_newick
4083      my ( $node, $file, $width, $min_dx, $dy ) = @_;  {
4084        my ( $node, $file, @opts ) = @_;
4085    
4086      my ( $fh, $close ) = open_output( $file );      my ( $fh, $close ) = open_output( $file );
4087      $fh or return;      $fh or return;
4088    
4089      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      my $html = $opts[0] && ref($opts[0]) eq 'HASH'
4090                            && $opts[0]->{ chars }
4091                            && $opts[0]->{ chars } =~ /html/;
4092        print $fh '<PRE>' if $html;
4093        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
4094        print $fh "</PRE>\n" if $html;
4095    
4096      if ( $close ) { close $fh }      if ( $close ) { close $fh }
4097  }  }
4098    
4099    
4100  #===============================================================================  #===============================================================================
4101    #  Character sets for printer plot trees:
4102    #-------------------------------------------------------------------------------
4103    
4104    my %char_set =
4105      ( text1     => { space  => ' ',
4106                       horiz  => '-',
4107                       vert   => '|',
4108                       el_d_r => '/',
4109                       el_u_r => '\\',
4110                       el_d_l => '\\',
4111                       el_u_l => '/',
4112                       tee_l  => '+',
4113                       tee_r  => '+',
4114                       tee_u  => '+',
4115                       tee_d  => '+',
4116                       half_l => '-',
4117                       half_r => '-',
4118                       half_u => '|',
4119                       half_d => '|',
4120                       cross  => '+',
4121                     },
4122        text2     => { space  => ' ',
4123                       horiz  => '-',
4124                       vert   => '|',
4125                       el_d_r => '+',
4126                       el_u_r => '+',
4127                       el_d_l => '+',
4128                       el_u_l => '+',
4129                       tee_l  => '+',
4130                       tee_r  => '+',
4131                       tee_u  => '+',
4132                       tee_d  => '+',
4133                       half_l => '-',
4134                       half_r => '-',
4135                       half_u => '|',
4136                       half_d => '|',
4137                       cross  => '+',
4138                     },
4139        html_box  => { space  => '&nbsp;',
4140                       horiz  => '&#9472;',
4141                       vert   => '&#9474;',
4142                       el_d_r => '&#9484;',
4143                       el_u_r => '&#9492;',
4144                       el_d_l => '&#9488;',
4145                       el_u_l => '&#9496;',
4146                       tee_l  => '&#9508;',
4147                       tee_r  => '&#9500;',
4148                       tee_u  => '&#9524;',
4149                       tee_d  => '&#9516;',
4150                       half_l => '&#9588;',
4151                       half_r => '&#9590;',
4152                       half_u => '&#9589;',
4153                       half_d => '&#9591;',
4154                       cross  => '&#9532;',
4155                     },
4156        utf8_box  => { space  => ' ',
4157                       horiz  => chr(226) . chr(148) . chr(128),
4158                       vert   => chr(226) . chr(148) . chr(130),
4159                       el_d_r => chr(226) . chr(148) . chr(140),
4160                       el_u_r => chr(226) . chr(148) . chr(148),
4161                       el_d_l => chr(226) . chr(148) . chr(144),
4162                       el_u_l => chr(226) . chr(148) . chr(152),
4163                       tee_l  => chr(226) . chr(148) . chr(164),
4164                       tee_r  => chr(226) . chr(148) . chr(156),
4165                       tee_u  => chr(226) . chr(148) . chr(180),
4166                       tee_d  => chr(226) . chr(148) . chr(172),
4167                       half_l => chr(226) . chr(149) . chr(180),
4168                       half_r => chr(226) . chr(149) . chr(182),
4169                       half_u => chr(226) . chr(149) . chr(181),
4170                       half_d => chr(226) . chr(149) . chr(183),
4171                       cross  => chr(226) . chr(148) . chr(188),
4172                     },
4173      );
4174    
4175    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
4176    $char_set{ html1 }->{ space } = '&nbsp;';
4177    
4178    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
4179    $char_set{ html2 }->{ space } = '&nbsp;';
4180    
4181    #  Define some synonyms
4182    
4183    $char_set{ html } = $char_set{ html_box };
4184    $char_set{ line } = $char_set{ utf8_box };
4185    $char_set{ symb } = $char_set{ utf8_box };
4186    $char_set{ text } = $char_set{ text1 };
4187    $char_set{ utf8 } = $char_set{ utf8_box };
4188    
4189    #  Define tree formats and synonyms
4190    
4191    my %tree_format =
4192        ( text         => 'text',
4193          tree_tab_lbl => 'tree_tab_lbl',
4194          tree_lbl     => 'tree_lbl',
4195          chrlist_lbl  => 'chrlist_lbl',
4196          raw          => 'chrlist_lbl',
4197        );
4198    
4199    #===============================================================================
4200  #  Make a text plot of a tree:  #  Make a text plot of a tree:
4201  #  #
4202  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
4203  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
4204  #     $min_dx the minimum horizontal branch length  #
4205  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
4206    #     $width  # the approximate characters for the tree without labels (D = 68)
4207    #     $min_dx # the minimum horizontal branch length (D = 2)
4208    #     $dy     # the vertical space per taxon (D = 1, most compressed)
4209    #
4210    #  Options:
4211    #
4212    #    chars  => keyword       # the output character set for the tree
4213    #    dy     => nat_number    # the vertical space per taxon
4214    #    format => keyword       # output format of each line
4215    #    min_dx => whole_number  # the minimum horizontal branch length
4216    #    width  => whole_number  # approximate tree width without labels
4217    #
4218    #  Character sets:
4219    #
4220    #    html       #  synonym of html1
4221    #    html_box   #  html encoding of unicode box drawing characters
4222    #    html1      #  text1 with nonbreaking spaces
4223    #    html2      #  text2 with nonbreaking spaces
4224    #    line       #  synonym of utf8_box
4225    #    raw        #  pass out the internal representation
4226    #    symb       #  synonym of utf8_box
4227    #    text       #  synonym of text1 (Default)
4228    #    text1      #  ascii characters: - + | / \ and space
4229    #    text2      #  ascii characters: - + | + + and space
4230    #    utf8       #  synonym of utf8_box
4231    #    utf8_box   #  utf8 encoding of unicode box drawing characters
4232    #
4233    #  Formats for row lines:
4234    #
4235    #    text           #    $textstring              # Default
4236    #    tree_tab_lbl   #    $treestr \t $labelstr
4237    #    tree_lbl       # [  $treestr,  $labelstr ]
4238    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
4239    #    raw            #  synonym of chrlist_lbl
4240  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
4241  #===============================================================================  #===============================================================================
4242  sub text_plot_newick {  sub text_plot_newick
4243      my ( $node, $width, $min_dx, $dy ) = @_;  {
4244        my $node = shift @_;
4245      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
4246      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
4247      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
4248      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
4249        {
4250            $opts = shift;
4251        }
4252        else
4253        {
4254            ( $width, $min_dx, $dy ) = @_;
4255            $opts = {};
4256        }
4257    
4258        $chars = $opts->{ chars } || '';
4259        my $charH;
4260        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
4261        my $is_box = $charH eq $char_set{ html_box }
4262                  || $charH eq $char_set{ utf8_box }
4263                  || $chars eq 'raw';
4264    
4265        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
4266        $fmt = $tree_format{ $fmt || '' } || 'text';
4267    
4268        $dy    ||= $opts->{ dy     } ||  1;
4269        $width ||= $opts->{ width  } || 68;
4270        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
4271        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
4272    
4273        #  Layout the tree:
4274    
4275      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
4276      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2835  Line 4279 
4279      my $hash = {};      my $hash = {};
4280      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
4281    
4282      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
4283    
4284      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
4285      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
4286        foreach ( ( $y1 .. $y2 ) )
4287        {
4288            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
4289            my $lbl  = '';
4290            if ( @$line )
4291            {
4292                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
4293                #  Translate tree characters
4294                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
4295            }
4296    
4297            # Convert to requested output format:
4298    
4299            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
4300                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
4301                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
4302                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
4303                       :                          ();
4304  }  }
4305    
4306        # if ( $cells )
4307        # {
4308        #     my $nmax = 0;
4309        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
4310        #     foreach ( @lines )
4311        #     {
4312        #         @$_ = map { "<TD>$_</TD>" } @$_;
4313        #         my $span = $nmax - @$_ + 1;
4314        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
4315        #     }
4316        # }
4317        # elsif ( $tables )
4318        # {
4319        #     my $nmax = 0;
4320        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
4321        #     foreach ( @lines )
4322        #     {
4323        #         @$_ = map { "<TD>$_</TD>" } @$_;
4324        #         my $span = $nmax - @$_ + 1;
4325        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
4326        #     }
4327        # }
4328    
4329        wantarray ? @lines : \@lines;
4330    }
4331    
4332    
4333  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4334  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
4335  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4336  sub layout_printer_plot {  sub layout_printer_plot
4337      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
4338        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
4339      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
4340      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
4341    
4342      my $dx = newick_x( $node );      my $dx = newick_x( $node );
4343      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
4344          $dx *= $x_scale;          $dx *= $x_scale;
4345          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
4346      }      }
4347      else {      else {
4348          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2881  Line 4369 
4369          $ymax = $y0;          $ymax = $y0;
4370    
4371          foreach ( @dl ) {          foreach ( @dl ) {
4372              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
4373                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
4374                                                            );
4375              push @ylist, $yi;              push @ylist, $yi;
4376              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
4377          }          }
# Line 2891  Line 4381 
4381    
4382          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
4383          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
4384          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
4385    
4386            #  Handle special case of internal node label. Put it between subtrees.
4387    
4388            if ( ( $dy >= 2 ) && node_has_lbl( $node ) && ( @dl > 1 ) ) {
4389                #  Find the descendents $i1 and $i2 to put the branch between
4390                my $i2 = 1;
4391                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
4392                my $i1 = $i2 - 1;
4393                #  Get bottom of subtree1 and top of subtree2:
4394                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
4395                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
4396                #  Midway between bottom of subtree1 and top of subtree2, with
4397                #  preferred rounding direction
4398                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
4399            }
4400      }      }
4401    
4402      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2901  Line 4406 
4406  }  }
4407    
4408    
4409  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #  What symbol do we get if we add a leftward line to some other symbol?
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree {  
     my ( $node, $prefix ) = @_;  
     defined( $prefix ) or $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }  
     $prefix or print STDERR "\n";  
 }  
   
   
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree_hash {  
     my ( $node, $hash, $prefix ) = @_;  
     defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }  
 }  
4410    
4411    my %with_left_line = ( space  => 'half_l',
4412                           horiz  => 'horiz',
4413                           vert   => 'tee_l',
4414                           el_d_r => 'tee_d',
4415                           el_u_r => 'tee_u',
4416                           el_d_l => 'el_d_l',
4417                           el_u_l => 'el_u_l',
4418                           tee_l  => 'tee_l',
4419                           tee_r  => 'cross',
4420                           tee_u  => 'tee_u',
4421                           tee_d  => 'tee_d',
4422                           half_l => 'half_l',
4423                           half_r => 'horiz',
4424                           half_u => 'el_u_l',
4425                           half_d => 'el_d_l',
4426                           cross  => 'cross',
4427                         );
4428    
4429  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4430  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Produce a description of one line of a printer plot tree.
4431    #
4432    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
4433    #
4434    #     \@line is the character descriptions accumulated so far, one per array
4435    #          element, except for a label, which can be any number of characters.
4436    #          Labels are followed by an empty string, so if $line->[-1] eq '',
4437    #          then $line->[-2] is a label. The calling program translates the
4438    #          symbol names to output characters.
4439    #
4440    #     \@node is a newick tree node
4441    #     \%hash contains tree layout information
4442    #      $row  is the row number (y value) that we are building
4443    #      $symb is the plot symbol proposed for the current x and y position
4444    #      $ilbl is true if internal node labels are allowed
4445    #
4446  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4447  sub text_tree_row {  sub text_tree_row
4448      my ( $node, $hash, $row, $line, $symb ) = @_;  {
4449        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
4450    
4451      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
4452      if ( $row < $y1 || $row > $y2 ) { return $line }      if ( $row < $y1 || $row > $y2 ) { return $line }
4453    
4454      if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }      if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
4455    
4456      if ( $row == $y ) {      if ( $row == $y ) {
4457          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");          while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
4458            push @$line, $symb,
4459                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
4460      }      }
4461    
4462      elsif ( $row > $yn1 && $row < $yn2 ) {      elsif ( $row > $yn1 && $row < $yn2 ) {
4463          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }          if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
4464          else { substr( $line, $x ) = "|" }          else               { $line->[$x] = 'vert' }
4465      }      }
4466    
4467      my @dl = newick_desc_list( $node );      my @dl = newick_desc_list( $node );
4468    
4469      if ( @dl < 1 ) {      if ( @dl < 1 ) {
4470          $line .= " " . $node->[1];          push @$line, ( node_has_lbl( $node ) ? newick_lbl( $node ) : '' ), '';
4471      }      }
4472    
4473      else {      else {
4474          my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line          my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
4475          $list[ 0]->[1] = "/";          if ( @list > 1 ) { #  Fix top and bottom sympbols
4476          $list[-1]->[1] = "\\";              $list[ 0]->[1] = 'el_d_r';
4477                $list[-1]->[1] = 'el_u_r';
4478            }
4479            elsif ( @list ) {  # Only one descendent
4480                $list[ 0]->[1] = 'half_r';
4481            }
4482          foreach ( @list ) {          foreach ( @list ) {
4483              my ( $n, $s ) = @$_;              my ( $n, $s ) = @$_;
4484              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
4485                  $line = text_tree_row( $n, $hash, $row, $line, $s );                  $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
4486              }              }
4487           }           }
4488    
4489          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }          if ( $row == $y ) {
4490                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
4491                                                         : $with_left_line{ $line->[$x] };
4492                push @$line, newick_lbl( $node), '' if $ilbl && node_has_lbl( $node );
4493            }
4494      }      }
4495    
4496      return $line;      return $line;
4497  }  }
4498    
4499    
4500    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4501    #  Debug routine
4502    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4503    sub dump_tree
4504    {
4505        my ( $node, $prefix ) = @_;
4506        defined( $prefix ) or $prefix = "";
4507        print STDERR $prefix, join(", ", @$node), "\n";
4508        my @dl = $node->[0] ? @{$node->[0]} : ();
4509        foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }
4510        $prefix or print STDERR "\n";
4511    }
4512    
4513    
4514    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4515    #  Debug routine
4516    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4517    sub dump_tree_hash
4518    {
4519        my ( $node, $hash, $prefix ) = @_;
4520        defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";
4521        print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";
4522        my @dl = $node->[0] ? @{$node->[0]} : ();
4523        foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }
4524    }
4525    
4526    
4527  #===============================================================================  #===============================================================================
4528  #  Open an input file stream:  #  Open an input file stream:
4529  #  #
# Line 2983  Line 4536 
4536  {  {
4537      my $file = shift;      my $file = shift;
4538      my $fh;      my $fh;
4539      if    ( ! defined( $file ) )     { return ( \*STDIN ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDIN ) }
4540      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
4541      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
4542    
# Line 3004  Line 4557 
4557  {  {
4558      my $file = shift;      my $file = shift;
4559      my $fh;      my $fh;
4560      if    ( ! defined( $file ) )     { return ( \*STDOUT ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDOUT ) }
4561      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
4562      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
4563    
# Line 3012  Line 4565 
4565      return undef;      return undef;
4566  }  }
4567    
4568    
4569    #===============================================================================
4570    #  Some subroutines copied from gjolists
4571    #===============================================================================
4572    #  Return the common prefix of two lists:
4573    #
4574    #  @common = common_prefix( \@list1, \@list2 )
4575    #
4576    #-----------------------------------------------------------------------------
4577    sub common_prefix
4578    {
4579        my ($l1, $l2) = @_;
4580        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4581        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4582    
4583        my $i = 0;
4584        my $l1_i;
4585        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
4586    
4587        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
4588    }
4589    
4590    
4591    #-----------------------------------------------------------------------------
4592    #  Return the unique suffixes of each of two lists:
4593    #
4594    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
4595    #
4596    #-----------------------------------------------------------------------------
4597    sub unique_suffixes
4598    {
4599        my ($l1, $l2) = @_;
4600        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4601        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4602    
4603        my $i = 0;
4604        my @l1 = @$l1;
4605        my @l2 = @$l2;
4606        my $l1_i;
4607        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
4608    
4609        splice @l1, 0, $i;
4610        splice @l2, 0, $i;
4611        return ( \@l1, \@l2 );
4612    }
4613    
4614    
4615    #-------------------------------------------------------------------------------
4616    #  List of values duplicated in a list (stable in order by second occurance):
4617    #
4618    #  @dups = duplicates( @list )
4619    #
4620    #-------------------------------------------------------------------------------
4621    sub duplicates
4622    {
4623        my %cnt = ();
4624        grep { ++$cnt{$_} == 2 } @_;
4625    }
4626    
4627    
4628    #-------------------------------------------------------------------------------
4629    #  Randomize the order of a list:
4630    #
4631    #  @random = random_order( @list )
4632    #
4633    #-------------------------------------------------------------------------------
4634    sub random_order
4635    {
4636        my ( $i, $j );
4637        for ( $i = @_ - 1; $i > 0; $i-- )
4638        {
4639            $j = int( ($i+1) * rand() );
4640            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
4641        }
4642    
4643       @_;
4644    }
4645    
4646    
4647    #-----------------------------------------------------------------------------
4648    #  Intersection of two or more sets:
4649    #
4650    #  @intersection = intersection( \@set1, \@set2, ... )
4651    #
4652    #-----------------------------------------------------------------------------
4653    sub intersection
4654    {
4655        my $set = shift;
4656        my @intersection = @$set;
4657    
4658        foreach $set ( @_ )
4659        {
4660            my %set = map { $_ => 1 } @$set;
4661            @intersection = grep { exists $set{ $_ } } @intersection;
4662        }
4663    
4664        @intersection;
4665    }
4666    
4667    
4668    #-----------------------------------------------------------------------------
4669    #  Elements in set 1, but not set 2:
4670    #
4671    #  @difference = set_difference( \@set1, \@set2 )
4672    #
4673    #-----------------------------------------------------------------------------
4674    sub set_difference
4675    {
4676        my ($set1, $set2) = @_;
4677        my %set2 = map { $_ => 1 } @$set2;
4678        grep { ! ( exists $set2{$_} ) } @$set1;
4679    }
4680    
4681    
4682  1;  1;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.25

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3