[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Dec 19 17:32:48 2006 UTC revision 1.22, Fri Aug 27 23:34:33 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2010 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 46 
46  #  #
47  #     $tree = \@rootnode;  #     $tree = \@rootnode;
48  #  #
49  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
50  #                $label, #  node label  #                $label, #  node label
51  #                $x,     #  branch length  #                $x,     #  branch length
52  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 54 
54  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
55  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
56  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
57  #             )  #             ]
58  #  #
59  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
60  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 65 
65  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
66  #  #
67  #  #
68    #  Ross Overbeek has a different tree node structure:
69    #
70    #     $node = [ Label,
71    #               DistanceToParent,
72    #               [ ParentPointer, ChildPointer1, ... ],
73    #               [ Name1\tVal1, Name2\tVal2, ... ]
74    #             ]
75    #
76    #  So:
77    #
78    #===============================================================================
79    #  Tree format interconversion:
80    #===============================================================================
81    #
82    #  $bool      = is_overbeek_tree( $tree )
83    #  $bool      = is_gjonewick_tree( $tree )
84    #
85    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
86    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
87    #
88  #===============================================================================  #===============================================================================
89  #  Tree data extraction:  #  Tree data extraction:
90  #===============================================================================  #===============================================================================
# Line 90  Line 112 
112  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
113  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
114  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
115  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
116    #
117    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
118  #  #
119  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
120  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
# Line 99  Line 123 
123  #  #
124  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
125  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
126    # \@tiprefs = newick_tip_ref_list( $noderef )
127  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
128    # \@tips    = newick_tip_list( $noderef )
129    #
130  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
131  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
132    #
133  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
134    # \@tips    = newick_duplicated_tips( $noderef )
135    #
136  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
137    #
138  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
139    # \@tips    = newick_shared_tips( $tree1, $tree2 )
140  #  #
141  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
142    #
143    #  %tip_distances = newick_tip_distances( $noderef )
144    # \%tip_distances = newick_tip_distances( $noderef )
145    #
146  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
147  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
148  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
149    #
150    #  Provide a standard name by which two trees can be compared for same topology
151    #
152    #  $stdname = std_tree_name( $tree )
153    #
154    #  Tree tip insertion point (tip is on branch of length x that
155    #  is inserted into branch connecting node1 and node2, a distance
156    #  x1 from node1 and x2 from node2):
157    #
158    #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
159    #
160    #  Standardized label for a node in terms of intersection of 3 lowest sorting
161    #  tips (sort is lower case):
162    #
163    #  @TipOrTips = std_node_name( $tree, $node )
164  #  #
165  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
166  #  Paths from root of tree:  #  Paths from root of tree:
# Line 140  Line 191 
191  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
192  #  #
193  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
194  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
195  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
196  #  before modifying.  #  before modifying.
197  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 208 
208  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
209  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
210  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
211    #  $node      = newick_rescale_branches( $node, $factor )
212    #  $node      = newick_modify_branches( $node, \&function )
213    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
214    #
215    #  Modify comments:
216    #
217    #  $node = newick_strip_comments( $node )
218  #  #
219  #  Modify rooting and/or order:  #  Modify rooting and/or order:
220  #  #
# Line 170  Line 228 
228  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
229  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
230  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
231  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
232    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
233    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
234  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
235  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
236  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
237  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
238  #  #
239  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
240    #  $newtree = rooted_newick_subtree( $tree,  @tips )
241    #  $newtree = rooted_newick_subtree( $tree, \@tips )
242  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
243  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
244    #  $newtree = newick_covering_subtree( $tree,  @tips )
245    #  $newtree = newick_covering_subtree( $tree, \@tips )
246  #  #
247  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
248  #  #
249    #  $node = newick_insert_at_node( $node, $subtree )
250    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
251    #
252    #===============================================================================
253    #  Tree neighborhood: subtree of n tips to represent a larger tree.
254    #===============================================================================
255    #
256    #  Focus around root:
257    #
258    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
259    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
260    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
261    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
262    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
263    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
264    #
265    #  Focus around a tip insertion point (the tip is not in the subtree):
266    #
267    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
268    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
269    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
270    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
271    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
272    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
273    #
274    #===============================================================================
275    #  Random trees
276    #===============================================================================
277    #
278    #   $tree = random_equibranch_tree(  @tips, \%options )
279    #   $tree = random_equibranch_tree( \@tips, \%options )
280    #   $tree = random_equibranch_tree(  @tips )
281    #   $tree = random_equibranch_tree( \@tips )
282    #
283    #   $tree = random_ultrametric_tree(  @tips, \%options )
284    #   $tree = random_ultrametric_tree( \@tips, \%options )
285    #   $tree = random_ultrametric_tree(  @tips )
286    #   $tree = random_ultrametric_tree( \@tips )
287    #
288  #===============================================================================  #===============================================================================
289  #  Tree reading and writing:  #  Tree reading and writing:
290  #===============================================================================  #===============================================================================
291    #  Write machine-readable trees:
292  #  #
293  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
294  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
295  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
296  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
297  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
298  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
299    #
300    #  Write human-readable trees:
301    #
302  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
303  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
304  #  #
305    #  Read trees:
306    #
307    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
308    #  @trees = read_newick_trees( $file ) # reads to end of file
309  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
310  #  #
311  #===============================================================================  #===============================================================================
312    
313    
314    use Carp;
315    use Data::Dumper;
316    use strict;
317    
318  require Exporter;  require Exporter;
319    
320  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
321  our @EXPORT = qw(  our @EXPORT = qw(
322            is_overbeek_tree
323            is_gjonewick_tree
324            overbeek_to_gjonewick
325            gjonewick_to_overbeek
326            newick_is_valid
327          newick_is_rooted          newick_is_rooted
328          newick_is_unrooted          newick_is_unrooted
329          tree_rooted_on_tip          tree_rooted_on_tip
330          newick_is_bifurcating          newick_is_bifurcating
331          newick_tip_count          newick_tip_count
332            newick_tip_ref_list
333          newick_tip_list          newick_tip_list
334    
335          newick_first_tip          newick_first_tip
336          newick_duplicated_tips          newick_duplicated_tips
337          newick_tip_in_tree          newick_tip_in_tree
338          newick_shared_tips          newick_shared_tips
339    
340          newick_tree_length          newick_tree_length
341            newick_tip_distances
342          newick_max_X          newick_max_X
343          newick_most_distant_tip_ref          newick_most_distant_tip_ref
344          newick_most_distant_tip_name          newick_most_distant_tip_name
345    
346          std_newick_name          newick_tip_insertion_point
347    
348            std_tree_name
349    
350          path_to_tip          path_to_tip
351          path_to_named_node          path_to_named_node
# Line 241  Line 366 
366          newick_set_undefined_branches          newick_set_undefined_branches
367          newick_set_all_branches          newick_set_all_branches
368          newick_fix_negative_branches          newick_fix_negative_branches
369            newick_rescale_branches
370            newick_modify_branches
371    
372            newick_strip_comments
373    
374          normalize_newick_tree          normalize_newick_tree
375          reverse_newick_tree          reverse_newick_tree
# Line 254  Line 383 
383          reroot_newick_next_to_tip          reroot_newick_next_to_tip
384          reroot_newick_to_node          reroot_newick_to_node
385          reroot_newick_to_node_ref          reroot_newick_to_node_ref
386            reroot_newick_between_nodes
387            reroot_newick_to_midpoint
388            reroot_newick_to_midpoint_w
389          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
390          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
391          uproot_tip_rooted_newick          uproot_tip_rooted_newick
392          uproot_newick          uproot_newick
393    
394          prune_from_newick          prune_from_newick
395            rooted_newick_subtree
396          newick_subtree          newick_subtree
397            newick_covering_subtree
398          collapse_zero_length_branches          collapse_zero_length_branches
399    
400            newick_insert_at_node
401            newick_insert_between_nodes
402    
403            root_neighborhood_representative_tree
404            root_neighborhood_representative_tips
405            tip_neighborhood_representative_tree
406            tip_neighborhood_representative_tips
407    
408            random_equibranch_tree
409            random_ultrametric_tree
410    
411          writeNewickTree          writeNewickTree
412          fwriteNewickTree          fwriteNewickTree
413          strNewickTree          strNewickTree
414          formatNewickTree          formatNewickTree
415    
416            read_newick_tree
417            read_newick_trees
418          parse_newick_tree_str          parse_newick_tree_str
419    
420          printer_plot_newick          printer_plot_newick
# Line 305  Line 453 
453          );          );
454    
455    
456  use gjolists qw(  #-------------------------------------------------------------------------------
457          common_prefix  #  Internally used definitions
458          common_and_unique  #-------------------------------------------------------------------------------
         unique_suffixes  
   
         unique_set  
         duplicates  
         random_order  
   
         union  
         intersection  
         set_difference  
         );  
459    
460    sub array_ref { $_[0] && ref( $_[0] ) eq 'ARRAY' }
461    sub hash_ref  { $_[0] && ref( $_[0] ) eq 'HASH'  }
462    
 use strict;  
463    
464    #===============================================================================
465    #  Interconvert overbeek and gjonewick trees:
466    #===============================================================================
467    
468  #-------------------------------------------------------------------------------  sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
469  #  Internally used definitions  
470  #-------------------------------------------------------------------------------  sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
471    
472    sub overbeek_to_gjonewick
473    {
474        return () unless ref( $_[0] ) eq 'ARRAY';
475        my ( $lbl, $x, $desc ) = @{ $_[0] };
476        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
477        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
478    }
479    
480  sub array_ref { ref( $_[0] ) eq "ARRAY" }  sub gjonewick_to_overbeek
481  sub hash_ref  { ref( $_[0] ) eq "HASH"  }  {
482        return () unless ref( $_[0] ) eq 'ARRAY';
483        my ( $desc, $lbl, $x ) = @{ $_[0] };
484        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
485        my $parent = $_[1];
486        my $node = [ $lbl, $x, undef, [] ];
487        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
488        return $node;
489    }
490    
491    
492  #===============================================================================  #===============================================================================
# Line 351  Line 509 
509  #  #
510  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
511    
512  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
513  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
514  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
515  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
516  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
517  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
518  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
519  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
520    
521  sub newick_desc_list {  sub newick_desc_list {
522      my $node = $_[0];      my $node = $_[0];
523      ! array_ref( $node      ) ? undef           :      array_ref( $node ) && array_ref( $node->[0] ) ? @{ $node->[0] } : ();
       array_ref( $node->[0] ) ? @{ $node->[0] } :  
                                 ()              ;  
524  }  }
525    
526  sub newick_n_desc {  sub newick_n_desc {
527      my $node = $_[0];      my $node = $_[0];
528      ! array_ref( $node      ) ? undef                  :      array_ref( $node ) && array_ref( $node->[0] ) ? scalar @{ $node->[0] } : 0;
       array_ref( $node->[0] ) ? scalar @{ $node->[0] } :  
                                 0                      ;  
529  }  }
530    
531  sub newick_desc_i {  sub newick_desc_i {
532      my ( $node, $i ) = @_;      my ( $node, $i ) = @_;
533      ! array_ref( $node      ) ? undef              :      array_ref( $node ) && $i && array_ref( $node->[0] ) ? $node->[0]->[$i-1] : undef;
       array_ref( $node->[0] ) ? $node->[0]->[$i-1] :  
                                 undef              ;  
534  }  }
535    
536  sub node_is_tip {  sub node_is_tip {
# Line 427  Line 579 
579  #===============================================================================  #===============================================================================
580  #  Some tree property tests:  #  Some tree property tests:
581  #===============================================================================  #===============================================================================
582    #  Tree is valid?
583    #
584    #  $bool = newick_is_valid( $node, $verbose )
585    #-------------------------------------------------------------------------------
586    sub newick_is_valid
587    {
588        my $node = shift;
589    
590        if ( ! array_ref( $node ) )
591        {
592            print STDERR "Node is not array reference\n" if $_[0];
593            return 0;
594        }
595    
596        my @node = @$node;
597        if ( ! @node )
598        {
599            print STDERR "Node is empty array reference\n" if $_[0];
600            return 0;
601        }
602    
603        # Must have descendant or label:
604    
605        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
606        {
607            print STDERR "Node has neither descendant nor label\n" if $_[0];
608            return 0;
609        }
610    
611        #  If comments are present, they must be array references
612    
613        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
614        {
615            if ( defined( $_ ) && ! array_ref( $_ ) )
616            {
617                print STDERR "Node has neither descendant or label\n" if $_[0];
618                return 0;
619            }
620        }
621    
622        #  Inspect the descendants:
623    
624        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
625        {
626            newick_is_valid( $_, @_ ) || return 0
627        }
628    
629        return 1;
630    }
631    
632    
633    #-------------------------------------------------------------------------------
634  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
635  #  #
636  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 512  Line 716 
716  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
717  #  List of tip nodes:  #  List of tip nodes:
718  #  #
719  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
720    # \@tips = newick_tip_ref_list( $noderef )
721  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
722  sub newick_tip_ref_list {  sub newick_tip_ref_list {
723      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 529  Line 734 
734          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
735      }      }
736    
737      @list;      wantarray ? @list : \@list;
738  }  }
739    
740    
# Line 537  Line 742 
742  #  List of tips:  #  List of tips:
743  #  #
744  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
745    # \@tips = newick_tip_list( $node )
746  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
747  sub newick_tip_list {  sub newick_tip_list {
748      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
749        wantarray ? @tips : \@tips;
750  }  }
751    
752    
# Line 578  Line 785 
785  #  List of duplicated tip labels.  #  List of duplicated tip labels.
786  #  #
787  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
788    # \@tips = newick_duplicated_tips( $node )
789  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
790  sub newick_duplicated_tips {  sub newick_duplicated_tips {
791      duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
792        wantarray ? @tips : \@tips;
793  }  }
794    
795    
# Line 611  Line 820 
820  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
821  #  #
822  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
823    # \@tips = newick_shared_tips( $tree1, $tree2 )
824  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
825  sub newick_shared_tips {  sub newick_shared_tips {
826      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
827      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
828      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
829      intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
830        wantarray ? @tips : \@tips;
831  }  }
832    
833    
# Line 638  Line 849 
849    
850    
851  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
852    #  Hash of tip nodes and corresponding distances from root:
853    #
854    #   %tip_distances = newick_tip_distances( $node )
855    #  \%tip_distances = newick_tip_distances( $node )
856    #-------------------------------------------------------------------------------
857    sub newick_tip_distances
858    {
859        my ( $node, $x, $hash ) = @_;
860        my $root = ! $hash;
861        ref( $hash ) eq 'HASH' or $hash = {};
862    
863        $x ||= 0;
864        $x  += newick_x( $node ) || 0;
865    
866        #  Is it a tip?
867    
868        my $n_desc = newick_n_desc( $node );
869        if ( ! $n_desc )
870        {
871            $hash->{ newick_lbl( $node ) } = $x;
872            return $hash;
873        }
874    
875        #  Tree rooted on tip?
876    
877        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
878        {
879            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
880        }
881    
882        foreach ( newick_desc_list( $node ) )
883        {
884            newick_tip_distances( $_, $x, $hash );
885        }
886    
887        wantarray ? %$hash : $hash;
888    }
889    
890    
891    #-------------------------------------------------------------------------------
892  #  Tree max X.  #  Tree max X.
893  #  #
894  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 712  Line 963 
963    
964    
965  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
966    #  Tree tip insertion point (with standard node labels):
967    #
968    #  [ $node1, $x1, $node2, $x2, $x ]
969    #           = newick_tip_insertion_point( $tree, $tip )
970    #
971    #  Which means: tip is on a branch of length x that is inserted into the branch
972    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
973    #
974    #                x1    +------ n1a (lowest sorting tip of this subtree)
975    #            +--------n1
976    #            |         +------n1b (lowest sorting tip of this subtree)
977    #  tip-------n
978    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
979    #            +------n2
980    #               x2   +-------- n2b (lowest sorting tip of this subtree)
981    #
982    #  The designations of 1 vs 2, and a vs b are chosen such that:
983    #     n1a < n1b, and n2a < n2b, and n1a < n2a
984    #
985    #  Then the statandard description becomes:
986    #
987    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
988    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
989    #    x
990    #  ]
991    #
992    #-------------------------------------------------------------------------------
993    sub newick_tip_insertion_point
994    {
995        my ( $tree, $tip ) = @_;
996        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
997        $tree = copy_newick_tree( $tree )           or return undef;
998        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
999        my $node = $tree;
1000    
1001        my $x  = 0;                        # Distance to node
1002        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
1003        $node  = $dl->[0];                 # Node adjacent to tip
1004        $dl    = newick_desc_ref( $node );
1005        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
1006        {
1007            $node = $dl->[0];
1008            $x   += newick_x( $node );
1009            $dl   = newick_desc_ref( $node );
1010        }
1011        $x += newick_x( $node );
1012    
1013        #  We are now at the node that is the insertion point.
1014        #  Is it a tip?
1015    
1016        my @description;
1017    
1018        if ( ( ! $dl ) || @$dl == 0 )
1019        {
1020            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
1021        }
1022    
1023        #  Is it a trifurcation or greater, in which case it does not go
1024        #  away with tip deletion?
1025    
1026        elsif ( @$dl > 2 )
1027        {
1028            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
1029        }
1030    
1031        #  The node is bifurcating.  We need to describe it.
1032    
1033        else
1034        {
1035            my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1036            my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1037    
1038            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1039            if ( @$n2 == 2 )
1040            {
1041                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
1042            }
1043            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
1044            @description = ( $n1, $x1, $n2, $x2, $x );
1045        }
1046    
1047        return wantarray ? @description : \@description;
1048    }
1049    
1050    
1051    sub describe_descendant
1052    {
1053        my $node = shift;
1054    
1055        my $x  = 0;                        # Distance to node
1056        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
1057        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
1058        {
1059            $node = $dl->[0];
1060            $x   += newick_x( $node );
1061            $dl   = newick_desc_ref( $node );
1062        }
1063        $x += newick_x( $node );
1064    
1065        #  Is it a tip?  Return list of one tip;
1066    
1067        if ( ( ! $dl ) || @$dl == 0 )
1068        {
1069            return ( [ newick_lbl( $node ) ], $x );
1070        }
1071    
1072        #  Get tips of each descendent, keeping lowest sorting from each.
1073        #  Return the two lowest of those (the third will come from the
1074        #  other side of the original node).
1075    
1076        my @rep_tips = sort { lc $a cmp lc $b }
1077                       map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1078                       @$dl;
1079        return ( [ @rep_tips[0,1] ], $x );
1080    }
1081    
1082    
1083    #-------------------------------------------------------------------------------
1084  #  Standard node name:  #  Standard node name:
1085  #     Tip label if at a tip  #     Tip label if at a tip
1086  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1087  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1088  #  #
1089  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1090  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1091  sub std_node_name {  sub std_node_name {
1092      my $tree = $_[0];      my $tree = $_[0];
# Line 734  Line 1103 
1103      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
1104      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1105    
1106      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
1107      my @best = map {      my @best = map
1108              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );            {
1109              @rest = set_difference( \@rest, \@tips );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1110                @rest = &set_difference( \@rest, \@tips );
1111              $tips[0];              $tips[0];
1112          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1113    
# Line 825  Line 1195 
1195      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1196      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1197         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1198         if ( @path ) { return @path }          return @path if @path;
1199      }      }
1200    
1201      ();  #  Not found      ();  #  Not found
# Line 856  Line 1226 
1226      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1227    
1228      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1229      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1230      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1231      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1232    
1233      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1234      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 909  Line 1279 
1279      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1280    
1281      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1282      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1283      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1284      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1285    
# Line 930  Line 1300 
1300    
1301      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1302                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1303      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1304      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1305    
1306      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1307      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1308      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1309      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1310    
# Line 1150  Line 1519 
1519      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1520    
1521      my $n = 0;      my $n = 0;
1522      if ( $not_root ) {      if ( $not_root )
1523        {
1524          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1525          $n++;          $n++;
1526      }      }
1527    
1528      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1529        {
1530          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1531      }      }
1532    
# Line 1164  Line 1535 
1535    
1536    
1537  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1538    #  Rescale all branch lenghts by factor.
1539    #
1540    #  $node = newick_rescale_branches( $node, $factor )
1541    #-------------------------------------------------------------------------------
1542    sub newick_rescale_branches {
1543        my ( $node, $factor ) = @_;
1544    
1545        my $x = newick_x( $node );
1546        set_newick_x( $node, $factor * $x ) if $x;
1547    
1548        foreach ( newick_desc_list( $node ) )
1549        {
1550            newick_rescale_branches( $_, $factor );
1551        }
1552    
1553        $node;
1554    }
1555    
1556    
1557    #-------------------------------------------------------------------------------
1558    #  Modify all branch lengths by a function.
1559    #
1560    #     $node = newick_modify_branches( $node, \&function )
1561    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1562    #
1563    #  Function must have form
1564    #
1565    #     $x2 = &$function( $x1 )
1566    #     $x2 = &$function( $x1, @$func_parms )
1567    #
1568    #-------------------------------------------------------------------------------
1569    sub newick_modify_branches {
1570        my ( $node, $func, $parm ) = @_;
1571    
1572        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1573        foreach ( newick_desc_list( $node ) )
1574        {
1575            newick_modify_branches( $_, $func, $parm )
1576        }
1577    
1578        $node;
1579    }
1580    
1581    
1582    #-------------------------------------------------------------------------------
1583  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1584  #  #
1585  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1189  Line 1605 
1605    
1606    
1607  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1608    #  Remove comments from a newick tree (e.g., before writing for phylip).
1609    #
1610    #  $node = newick_strip_comments( $node )
1611    #-------------------------------------------------------------------------------
1612    sub newick_strip_comments {
1613        my ( $node ) = @_;
1614    
1615        @$node = @$node[ 0 .. 2 ];
1616        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1617        $node;
1618    }
1619    
1620    
1621    #-------------------------------------------------------------------------------
1622  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1623  #  #
1624  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1238  Line 1668 
1668    
1669    
1670  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1671    #  Standard name for a Newick tree topology
1672    #
1673    #    $stdname = std_tree_name( $tree )
1674    #
1675    #-------------------------------------------------------------------------------
1676    sub std_tree_name
1677    {
1678        my ( $tree ) = @_;
1679        my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
1680        ( std_tree_name_2( reroot_newick_next_to_tip( copy_newick_tree( $tree ), $mintip ) ) )[0];
1681    }
1682    
1683    
1684    #
1685    #  ( $name, $mintip ) = std_tree_name_2( $node )
1686    #
1687    sub std_tree_name_2
1688    {
1689        my ( $node ) = @_;
1690    
1691        my @descends = newick_desc_list( $node );
1692        if ( @descends == 0 )
1693        {
1694            my $lbl = newick_lbl( $node );
1695            return ( $lbl, $lbl );
1696        }
1697    
1698        my @list = sort { lc $a->[1] cmp lc $b->[1] || $a->[1] cmp $b->[1] }
1699                   map  { [ std_tree_name_2( $_ ) ] }
1700                   @descends;
1701        my $mintip = $list[0]->[1];
1702        my $name   = '(' . join( "\t", map { $_->[0] } @list ) . ')';
1703    
1704        return ( $name, $mintip );
1705    }
1706    
1707    
1708    #-------------------------------------------------------------------------------
1709  #  Move largest groups to periphery of tree (in place).  #  Move largest groups to periphery of tree (in place).
1710  #  #
1711  #      dir  <= -2 for up-sweeping tree (big groups always first),  #      dir  <= -2 for up-sweeping tree (big groups always first),
# Line 1297  Line 1765 
1765      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1766      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1767    
     #  Reorder this subtree:  
   
1768      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1769      if    ( $dir < 0 ) {                   #  Big group first  
1770          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1771    
1772        if ( $dir )
1773        {
1774            #  Big group first
1775            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1776    
1777            my ( @dl1, @dl2 );
1778            for ( my $i = 0; $i < $nd; $i++ ) {
1779                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1780      }      }
1781      elsif ( $dir > 0 ) {                   #  Small group first  
1782          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1783                                    : ( @dl2, reverse @dl1 );
1784      }      }
1785    
1786      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1336  Line 1812 
1812  #  #
1813  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1814  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1815  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1816    {
1817      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1818      my %cnt;      my %cnt;
1819    
# Line 1356  Line 1833 
1833  #           = 0 for no change, and  #           = 0 for no change, and
1834  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1835  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1836  sub reorder_against_tip_count {  sub reorder_against_tip_count
1837    {
1838      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1839    
1840      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1395  Line 1873 
1873  #  #
1874  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1875  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1876  sub random_order_newick_tree {  sub random_order_newick_tree
1877    {
1878      my ( $node ) = @_;      my ( $node ) = @_;
1879    
1880      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1404  Line 1883 
1883      #  Reorder this subtree:      #  Reorder this subtree:
1884    
1885      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1886      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1887    
1888      #  Reorder descendants:      #  Reorder descendants:
1889    
# Line 1419  Line 1898 
1898  #  #
1899  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1900  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1901  sub reroot_newick_by_path {  sub reroot_newick_by_path
1902    {
1903      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1904      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1905    
# Line 1432  Line 1912 
1912      #      #
1913      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1914      #      #
1915      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1916    
1917      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1918      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1447  Line 1927 
1927    
1928      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1929      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1930      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1931    
1932      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1933    
# Line 1527  Line 2007 
2007    
2008    
2009  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2010  #  Move root of tree to an approximate midpoint.  #  Reroot a newick tree along the path between 2 nodes:
2011  #  #
2012  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
2013  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2014  sub reroot_newick_to_approx_midpoint {  sub reroot_newick_between_nodes
2015      my ( $tree ) = @_;  {
2016        my ( $tree, $node1, $node2, $fraction ) = @_;
2017        array_ref( $tree ) or return undef;
2018        $fraction >= 0 && $fraction <= 1 or return undef;
2019    
2020      #  Compile average tip to node distances assending      #  Find the paths to the nodes:
2021    
2022      my $dists1 = average_to_tips_1( $tree );      my @path1 = path_to_node( $tree, $node1 ) or return undef;
2023        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2024    
2025      #  Compile average tip to node distances descending, returning midpoint node      reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2026    }
2027    
2028    
2029    #-------------------------------------------------------------------------------
2030    #  Reroot a newick tree along the path between 2 nodes:
2031    #
2032    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
2033    #-------------------------------------------------------------------------------
2034    sub reroot_newick_between_node_refs
2035    {
2036        my ( $tree, $node1, $node2, $fraction ) = @_;
2037        array_ref( $tree ) or return undef;
2038    
2039      my $node = average_to_tips_2( $dists1, undef, undef );      #  Find the paths to the nodes:
2040    
2041      #  Reroot      my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
2042        my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
2043    
2044      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2045  }  }
2046    
2047    
2048    #-------------------------------------------------------------------------------
2049    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2050    #
2051    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
2052    #-------------------------------------------------------------------------------
2053    sub reroot_newick_between_nodes_by_path
2054    {
2055        my ( $tree, $path1, $path2, $fraction ) = @_;
2056        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
2057           or return undef;
2058        $fraction >= 0 && $fraction <= 1 or return undef;
2059    
2060        my @path1 = @$path1;
2061        my @path2 = @$path2;
2062    
2063        #  Trim the common prefix, saving it:
2064    
2065        my @prefix = ();
2066        while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2067        {
2068            push @prefix, splice( @path1, 0, 2 );
2069            splice( @path2, 0, 2 );
2070        }
2071    
2072        my ( @path, $dist );
2073        if    ( @path1 < 3 )
2074        {
2075            @path2 >= 3 or return undef;              # node1 = node2
2076            $dist = $fraction * newick_path_length( @path2 );
2077            @path = @path2;
2078        }
2079        elsif ( @path2 < 3 )
2080        {
2081            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2082            @path = @path1;
2083        }
2084        else
2085        {
2086            my $dist1 = newick_path_length( @path1 );
2087            my $dist2 = newick_path_length( @path2 );
2088            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2089            @path = ( $dist <= 0 ) ? @path1 : @path2;
2090            $dist = abs( $dist );
2091        }
2092    
2093        #  Descend tree until we reach the insertion branch:
2094    
2095        my $x;
2096        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2097        {
2098            $dist -= $x;
2099            push @prefix, splice( @path, 0, 2 );
2100        }
2101    
2102        #  Insert the new node:
2103    
2104        my $newnode = [ [ $path[2] ], undef, $dist ];
2105        set_newick_desc_i( $path[0], $path[1], $newnode );
2106        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2107    
2108        #  We can now build the path from root to the new node
2109    
2110        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2111    }
2112    
2113    
2114    #-------------------------------------------------------------------------------
2115    #  Move root of tree to an approximate midpoint.
2116    #
2117    #  $newtree = reroot_newick_to_approx_midpoint( $tree )
2118    #-------------------------------------------------------------------------------
2119    sub reroot_newick_to_approx_midpoint {
2120        my ( $tree ) = @_;
2121    
2122        #  Compile average tip to node distances assending
2123    
2124        my $dists1 = average_to_tips_1( $tree );
2125    
2126        #  Compile average tip to node distances descending, returning midpoint
2127        #  cadidates as a list of [ $node1, $node2, $fraction ]
2128    
2129        my @mids = average_to_tips_2( $dists1, undef, undef );
2130    
2131        #  Reroot to first midpoint candidate
2132    
2133        return $tree if ! @mids;
2134        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2135        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2136    }
2137    
2138    
2139    #-------------------------------------------------------------------------------
2140    #  Move root of tree to a midpoint.
2141    #
2142    #  $newtree = reroot_newick_to_midpoint( $tree )
2143    #-------------------------------------------------------------------------------
2144    sub reroot_newick_to_midpoint {
2145        my ( $tree ) = @_;
2146    
2147        #  Compile average tip to node distances assending
2148    
2149        my $dists1 = average_to_tips_1( $tree );
2150    
2151        #  Compile average tip to node distances descending, returning midpoint
2152        #  [ $node1, $node2, $fraction ]
2153    
2154        my @mids = average_to_tips_2( $dists1, undef, undef );
2155    
2156        @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2157    }
2158    
2159    
2160    #-------------------------------------------------------------------------------
2161    #  Compile average tip to node distances assending
2162    #-------------------------------------------------------------------------------
2163  sub average_to_tips_1 {  sub average_to_tips_1 {
2164      my ( $node ) = @_;      my ( $node ) = @_;
2165    
# Line 1558  Line 2170 
2170          foreach ( @desc_dists ) { $x_below += $_->[0] }          foreach ( @desc_dists ) { $x_below += $_->[0] }
2171          $x_below /= @desc_dists;          $x_below /= @desc_dists;
2172      }      }
2173    
2174      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2175      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2176    
# Line 1565  Line 2178 
2178  }  }
2179    
2180    
2181    #-------------------------------------------------------------------------------
2182    #  Compile average tip to node distances descending, returning midpoint as
2183    #  [ $node1, $node2, $fraction_of_dist_between ]
2184    #-------------------------------------------------------------------------------
2185  sub average_to_tips_2 {  sub average_to_tips_2 {
2186      my ( $dists1, $x_above, $anc_node ) = @_;      my ( $dists1, $x_above, $anc_node ) = @_;
2187      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2188    
2189      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2190    
2191      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2192      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2193      {      {
2194          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1587  Line 2200 
2200    
2201          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2202          {          {
2203              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2204          }              #  the way from $node to $anc_node:
2205          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2206          {                                     : 0.5;
2207              return undef;              push @mids, [ $node, $anc_node, $fract ];
2208          }          }
2209      }      }
2210    
2211      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2212    
2213      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2214      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1605  Line 2218 
2218          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2219    
2220          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2221          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2222      }      }
2223    
2224      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2225  }  }
2226    
2227    
# Line 1622  Line 2232 
2232  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2233  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2234      my ( $tree ) = @_;      my ( $tree ) = @_;
2235        array_ref( $tree ) or return undef;
2236    
2237        #  Compile average tip to node distances assending from tips
2238    
2239        my $dists1 = average_to_tips_1_w( $tree );
2240    
2241        #  Compile average tip to node distances descending, returning midpoints
2242    
2243        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2244    
2245        #  Reroot to first midpoint candidate
2246    
2247        return $tree if ! @mids;
2248        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2249        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2250    }
2251    
2252    
2253    #-------------------------------------------------------------------------------
2254    #  Move root of tree to an approximate midpoint.  Weight by tips.
2255    #
2256    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2257    #-------------------------------------------------------------------------------
2258    sub reroot_newick_to_midpoint_w {
2259        my ( $tree ) = @_;
2260        array_ref( $tree ) or return ();
2261    
2262      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2263    
# Line 1629  Line 2265 
2265    
2266      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2267    
2268      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2269    
2270      #  Reroot      #  Reroot at first candidate midpoint
2271    
2272      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2273  }  }
2274    
2275    
2276    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2277  sub average_to_tips_1_w {  sub average_to_tips_1_w {
2278      my ( $node ) = @_;      my ( $node ) = @_;
2279    
# Line 1654  Line 2291 
2291          }          }
2292          $x_below /= $n_below;          $x_below /= $n_below;
2293      }      }
2294    
2295      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2296      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2297    
# Line 1661  Line 2299 
2299  }  }
2300    
2301    
2302    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303  sub average_to_tips_2_w {  sub average_to_tips_2_w {
2304      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;
2305      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;
2306    
2307      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2308    
2309      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2310      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2311      {      {
2312          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1679  Line 2314 
2314          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2315          #  path from the root of the current tree).          #  path from the root of the current tree).
2316          #          #
2317          #  Is the root in the current branch?          #  Is their a root in the current branch?
2318    
2319          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2320          {          {
2321              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2322          }              #  the way from $node to $anc_node:
2323          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2324          {                                     : 0.5;
2325              return undef;              push @mids, [ $node, $anc_node, $fract ];
2326          }          }
2327      }      }
2328    
# Line 1707  Line 2342 
2342    
2343          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2344                                   : 0;                                   : 0;
2345          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2346      }      }
2347    
2348      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2349  }  }
2350    
2351    
# Line 1859  Line 2491 
2491      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2492  }  }
2493    
2494    #-------------------------------------------------------------------------------
2495    #  Add a subtree to a newick tree node:
2496    #
2497    #  $node = newick_insert_at_node( $node, $subtree )
2498    #-------------------------------------------------------------------------------
2499    sub newick_insert_at_node
2500    {
2501        my ( $node, $subtree ) = @_;
2502        array_ref( $node ) && array_ref( $subtree ) or return undef;
2503    
2504        #  We could check validity of trees, but ....
2505    
2506        my $dl = newick_desc_ref( $node );
2507        if ( array_ref( $dl ) )
2508        {
2509            push @$dl, $subtree;
2510        }
2511        else
2512        {
2513            set_newick_desc_ref( $node, [ $subtree ] );
2514        }
2515        return $node;
2516    }
2517    
2518    
2519    #-------------------------------------------------------------------------------
2520    #  Insert a subtree into a newick tree along the path between 2 nodes:
2521    #
2522    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2523    #-------------------------------------------------------------------------------
2524    sub newick_insert_between_nodes
2525    {
2526        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2527        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2528        $fraction >= 0 && $fraction <= 1 or return undef;
2529    
2530        #  Find the paths to the nodes:
2531    
2532        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2533        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2534    
2535        #  Trim the common prefix:
2536    
2537        while ( $path1[1] == $path2[1] )
2538        {
2539            splice( @path1, 0, 2 );
2540            splice( @path2, 0, 2 );
2541        }
2542    
2543        my ( @path, $dist );
2544        if    ( @path1 < 3 )
2545        {
2546            @path2 >= 3 or return undef;              # node1 = node2
2547            $dist = $fraction * newick_path_length( @path2 );
2548            @path = @path2;
2549        }
2550        elsif ( @path2 < 3 )
2551        {
2552            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2553            @path = @path1;
2554        }
2555        else
2556        {
2557            my $dist1 = newick_path_length( @path1 );
2558            my $dist2 = newick_path_length( @path2 );
2559            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2560            @path = ( $dist <= 0 ) ? @path1 : @path2;
2561            $dist = abs( $dist );
2562        }
2563    
2564        #  Descend tree until we reach the insertion branch:
2565    
2566        my $x;
2567        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2568        {
2569            $dist -= $x;
2570            splice( @path, 0, 2 );
2571        }
2572    
2573        #  Insert the new node:
2574    
2575        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2576        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2577    
2578        return $tree;
2579    }
2580    
2581    
2582  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2583  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
# Line 1941  Line 2660 
2660    
2661    
2662  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2663    #  Produce a potentially rooted subtree with the desired tips:
2664    #
2665    #     Except for (some) tip nodes, the tree produced is a copy.
2666    #     There is no check that requested tips exist.
2667    #
2668    #  $newtree = rooted_newick_subtree( $tree,  @tips )
2669    #  $newtree = rooted_newick_subtree( $tree, \@tips )
2670    #-------------------------------------------------------------------------------
2671    sub rooted_newick_subtree {
2672        my ( $tr, @tips ) = @_;
2673        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2674    
2675        if ( @tips < 2 ) { return undef }
2676        my $keephash = { map { ( $_, 1 ) } @tips };
2677        my $tr2 = subtree1( $tr, $keephash );
2678        $tr2->[2] = undef if $tr2;                   # undef root branch length
2679        $tr2;
2680    }
2681    
2682    
2683    #-------------------------------------------------------------------------------
2684  #  Produce a subtree with the desired tips:  #  Produce a subtree with the desired tips:
2685  #  #
2686  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
# Line 2014  Line 2754 
2754  }  }
2755    
2756    
2757    #-------------------------------------------------------------------------------
2758    #  The smallest subtree of rooted tree that includes @tips:
2759    #
2760    #    $node = newick_covering_subtree( $tree,  @tips )
2761    #    $node = newick_covering_subtree( $tree, \@tips )
2762    #-------------------------------------------------------------------------------
2763    
2764    sub newick_covering_subtree {
2765        my $tree = shift;
2766        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2767    
2768        #  Return smallest covering node, if any:
2769    
2770        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2771    }
2772    
2773    
2774    sub newick_covering_subtree_1 {
2775        my ( $node, $tips ) = @_;
2776        my $n_cover = 0;
2777        my @desc = newick_desc_list( $node );
2778        if ( @desc )
2779        {
2780            foreach ( @desc )
2781            {
2782                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2783                return ( $subtree, $n ) if $subtree;
2784                $n_cover += $n;
2785            }
2786        }
2787        elsif ( $tips->{ newick_lbl( $node ) } )
2788        {
2789            $n_cover++;
2790        }
2791    
2792        #  If all tips are covered, return node
2793    
2794        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2795    }
2796    
2797    
2798    #===============================================================================
2799    #
2800    #  Representative subtrees
2801    #
2802    #===============================================================================
2803    #  Find subtree of size n representating vicinity of the root:
2804    #
2805    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2806    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2807    #
2808    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2809    #  consequences on downstream programs.
2810    #-------------------------------------------------------------------------------
2811    sub root_neighborhood_representative_tree
2812    {
2813        my ( $tree, $n, $tip_priority ) = @_;
2814        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2815        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2816    
2817        $tip_priority ||= default_tip_priority( $tree );
2818        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2819                   root_proximal_newick_subtrees( $tree, $n );
2820    
2821        newick_subtree( copy_newick_tree( $tree ), \@tips );
2822    }
2823    
2824    
2825    #-------------------------------------------------------------------------------
2826    #  Find n tips to represent tree lineages in vicinity of another tip.
2827    #  Default tip priority is short total branch length.
2828    #
2829    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2830    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2831    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2832    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2833    #-------------------------------------------------------------------------------
2834    sub root_neighborhood_representative_tips
2835    {
2836        my ( $tree, $n, $tip_priority ) = @_;
2837        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2838    
2839        my @tips;
2840        if ( newick_tip_count( $tree ) <= $n )
2841        {
2842            @tips = newick_tip_list( $tree );
2843        }
2844        else
2845        {
2846            $tip_priority ||= default_tip_priority( $tree );
2847            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2848                    root_proximal_newick_subtrees( $tree, $n );
2849        }
2850    
2851        wantarray ? @tips : \@tips;
2852    }
2853    
2854    
2855    #-------------------------------------------------------------------------------
2856    #  Find subtree of size n representating vicinity of a tip:
2857    #
2858    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2859    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2860    #-------------------------------------------------------------------------------
2861    sub tip_neighborhood_representative_tree
2862    {
2863        my ( $tree, $tip, $n, $tip_priority ) = @_;
2864        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2865        newick_tip_in_tree( $tree, $tip ) or return undef;
2866    
2867        my $tree1 = copy_newick_tree( $tree );
2868        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2869        {
2870            return prune_from_newick( $tree1, $tip )
2871        }
2872    
2873        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2874        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2875        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2876        newick_subtree( copy_newick_tree( $tree ), \@tips );
2877    }
2878    
2879    
2880    #-------------------------------------------------------------------------------
2881    #  Find n tips to represent tree lineages in vicinity of another tip.
2882    #  Default tip priority is short total branch length.
2883    #
2884    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2885    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2886    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2887    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2888    #-------------------------------------------------------------------------------
2889    sub tip_neighborhood_representative_tips
2890    {
2891        my ( $tree, $tip, $n, $tip_priority ) = @_;
2892        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2893        newick_tip_in_tree( $tree, $tip ) or return undef;
2894    
2895        my @tips = newick_tip_list( $tree );
2896        if ( newick_tip_count( $tree ) - 1 <= $n )
2897        {
2898            @tips = grep { $_ ne $tip } @tips;
2899        }
2900        else
2901        {
2902            my $tree1 = copy_newick_tree( $tree );
2903            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2904            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2905            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2906        }
2907    
2908        wantarray ? @tips : \@tips;
2909    }
2910    
2911    
2912    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2913    #  Anonymous hash of the negative distance from root to each tip:
2914    #
2915    #   \%tip_priority = default_tip_priority( $tree )
2916    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2917    sub default_tip_priority
2918    {
2919        my ( $tree ) = @_;
2920        my $tip_distances = newick_tip_distances( $tree ) || {};
2921        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2922    }
2923    
2924    
2925    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2926    #  Select a tip from a subtree base on a priority value:
2927    #
2928    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2929    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2930    sub representative_tip_of_newick_node
2931    {
2932        my ( $node, $tip_priority ) = @_;
2933        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2934                      map  { [ $_, $tip_priority->{ $_ } ] }
2935                      newick_tip_list( $node );
2936        $tip->[0];                                   # Label from label-priority pair
2937    }
2938    
2939    
2940    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2941    #  Find n subtrees focused around the root of a tree.  Typically each will
2942    #  then be reduced to a single tip to make a representative tree:
2943    #
2944    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2945    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2946    sub root_proximal_newick_subtrees
2947    {
2948        my ( $tree, $n ) = @_;
2949        my $node_start_end = newick_branch_intervals( $tree );
2950        n_representative_branches( $n, $node_start_end );
2951    }
2952    
2953    
2954    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2955    #   @node_start_end = newick_branch_intervals( $tree )
2956    #  \@node_start_end = newick_branch_intervals( $tree )
2957    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2958    sub newick_branch_intervals
2959    {
2960        my ( $node, $parent_x ) = @_;
2961        $parent_x ||= 0;
2962        my ( $desc, undef, $dx ) = @$node;
2963        my $x = $parent_x + $dx;
2964        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2965        my @intervals = ( $interval,
2966                          map { &newick_branch_intervals( $_, $x ) } @$desc
2967                        );
2968        return wantarray ? @intervals : \@intervals;
2969    }
2970    
2971    
2972    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2973    #   @ids = n_representative_branches( $n,  @id_start_end )
2974    #   @ids = n_representative_branches( $n, \@id_start_end )
2975    #  \@ids = n_representative_branches( $n,  @id_start_end )
2976    #  \@ids = n_representative_branches( $n, \@id_start_end )
2977    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2978    sub n_representative_branches
2979    {
2980        my $n = shift;
2981        #  Sort intervals by start point:
2982        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2983                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2984        my @active = ();
2985        my ( $interval, $current_point );
2986        foreach $interval ( @unprocessed )
2987        {
2988            $current_point = $interval->[1];
2989            #  Filter out intervals that have ended.  This is N**2 in the number
2990            #  of representatives.  Fixing this would require maintaining a sorted
2991            #  active list.
2992            @active = grep { $_->[2] > $current_point } @active;
2993            push @active, $interval;
2994            last if ( @active >= $n );
2995        }
2996    
2997        my @ids = map { $_->[0] } @active;
2998        return wantarray() ? @ids : \@ids;
2999    }
3000    
3001    
3002    #===============================================================================
3003    #  Random trees
3004    #===============================================================================
3005    #
3006    #   $tree = random_equibranch_tree(  @tips, \%options )
3007    #   $tree = random_equibranch_tree( \@tips, \%options )
3008    #   $tree = random_equibranch_tree(  @tips )
3009    #   $tree = random_equibranch_tree( \@tips )
3010    #
3011    #  Options:
3012    #
3013    #     length => $branch_length   # D = 1
3014    #
3015    #-------------------------------------------------------------------------------
3016    sub random_equibranch_tree
3017    {
3018        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3019                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3020                 :                                  {};
3021        return undef if ! defined $_[0];
3022    
3023        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3024        return undef if @tips < 2;
3025    
3026        my $len = $opts->{ length } ||= 1;
3027    
3028        if ( @tips == 2 )
3029        {
3030            return [ [ map { [ [], $_, $len ] } @tips ], undef, 0 ];
3031        }
3032    
3033        my $tree = [ [ ], undef, 0 ];
3034    
3035        my @links;  # \$anc_dl[i], i.e. a reference to an element in a descendent list
3036    
3037        my $anc_dl = $tree->[0];
3038        foreach my $tip ( splice( @tips, 0, 3 ) )
3039        {
3040            my $node = [ [], $tip, $len ];
3041            push @$anc_dl, $node;
3042            push @links, \$anc_dl->[-1];  #  Ref to the just added descendent list entry
3043        }
3044    
3045        foreach my $tip ( @tips )
3046        {
3047            my $link    = $links[ int( rand( scalar @links ) ) ];
3048            my $newtip  = [ [], $tip, $len ];
3049            my $new_dl  = [ $$link, $newtip ];
3050            my $newnode = [ $new_dl, undef, $len ];
3051            $$link = $newnode;
3052            push @links, \$new_dl->[0], \$new_dl->[1]
3053        }
3054    
3055        return $tree;
3056    }
3057    
3058    
3059    #-------------------------------------------------------------------------------
3060    #
3061    #   $tree = random_ultrametric_tree(  @tips, \%options )
3062    #   $tree = random_ultrametric_tree( \@tips, \%options )
3063    #   $tree = random_ultrametric_tree(  @tips )
3064    #   $tree = random_ultrametric_tree( \@tips )
3065    #
3066    #  Options:
3067    #
3068    #     depth => $root_to_tip_dist   # D = 1
3069    #
3070    #-------------------------------------------------------------------------------
3071    sub random_ultrametric_tree
3072    {
3073        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3074                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3075                 :                                  {};
3076        return undef if ! defined $_[0];
3077    
3078        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3079        return undef if @tips < 2;
3080    
3081        my $d2tip = $opts->{ depth } ||= 1;
3082    
3083        #  Random tip addition order (for rooted tree it matters):
3084    
3085        @tips = sort { rand() <=> 0.5 } @tips;
3086        my $tree = [ [ ], undef, 0 ];
3087    
3088        my $subtree_size = { $tree => 0 };  # total branch length of each subtree
3089    
3090        #  We start with root bifurcation:
3091    
3092        foreach my $tip ( splice( @tips, 0, 2 ) )
3093        {
3094            my $node = [ [], $tip, $d2tip ];
3095            push @{ $tree->[0] }, $node;
3096            $subtree_size->{ $node }  = $d2tip;
3097            $subtree_size->{ $tree } += $d2tip;
3098        }
3099    
3100        #  Add each remaining tip at $pos, measured along the contour length
3101        #  of the tree (with no retracing along branches).
3102    
3103        foreach my $tip ( @tips )
3104        {
3105            my $pos = rand( $subtree_size->{ $tree } );
3106            random_add_to_ultrametric_tree( $tree, $tip, $subtree_size, $pos, $d2tip );
3107        }
3108    
3109        return $tree;
3110    }
3111    
3112    
3113    sub random_add_to_ultrametric_tree
3114    {
3115        my ( $node, $tip, $subtree_size, $pos, $d2tip ) = @_;
3116        $node && $node->[0] && ref $node->[0] eq 'ARRAY' or die "Bad tree node passed to random_add_to_ultrametric_tree().\n";
3117    
3118        # Find the descendent line that it goes in:
3119    
3120        my $i;
3121        my $dl = $node->[0];
3122        my $size0 = $subtree_size->{ $dl->[0] };
3123        if ( $size0 > $pos ) { $i = 0 } else { $i = 1; $pos -= $size0 }
3124        my $desc = $dl->[$i];
3125    
3126        # Does it go within the subtree, or the branch to the subtree?
3127    
3128        my $len;
3129        my $added;
3130        if ( ( $len = $desc->[2] ) <= $pos )
3131        {
3132            $added = random_add_to_ultrametric_tree( $desc, $tip, $subtree_size, $pos - $len, $d2tip - $len );
3133        }
3134        else
3135        {
3136            # If not in subtree, then it goes in the branch to the descendent node
3137            #
3138            #     ----- node  ------------       node
3139            #       ^   /  \       ^             /  \
3140            #       |       \      | pos             \l1
3141            #       |        \     v                  \
3142            #       |      len\ ----------         newnode
3143            #       |          \                     /  \ l2
3144            # d2tip |           \                   /    \
3145            #       |           desc               /     desc
3146            #       |           /  \            l3/      /  \
3147            #       |          .    .            /      .    .
3148            #       v         .      .          /      .      .
3149            #     -----      .        .     newtip    .        .
3150    
3151            my $l1      = $pos;
3152            my $l2      = $len   - $pos;
3153            my $l3      = $d2tip - $pos;
3154            my $newtip  = [ [], $tip, $l3 ];
3155            my $newnode = [ [ $desc, $newtip ], undef, $l1 ];
3156            $dl->[$i]   = $newnode;
3157            $subtree_size->{ $newtip  } = $l3;
3158            $subtree_size->{ $newnode } = $subtree_size->{ $desc } + $l3;
3159            $desc->[2] = $l2;
3160            $subtree_size->{ $desc } -= $l1;
3161            $added = $l3;
3162        }
3163    
3164        #  New branch was inserted below this point:
3165    
3166        $subtree_size->{ $node } += $added;
3167        return $added;
3168    }
3169    
3170    
3171    
3172  #===============================================================================  #===============================================================================
3173  #  #
3174  #  Tree writing and reading  #  Tree writing and reading
3175  #  #
3176  #===============================================================================  #===============================================================================
3177  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
3178    #  writeNewickTree( $tree, $file )
3179    #  writeNewickTree( $tree, \*FH )
3180  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
3181  sub writeNewickTree {  sub writeNewickTree {
3182      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
3183      $file || ( $file = \*STDOUT );      my ( $fh, $close ) = open_output( $file );
3184      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
3185        print  $fh  ( strNewickTree( $tree ), "\n" );
3186        close $fh if $close;
3187  }  }
3188    
3189    
# Line 2167  Line 3326 
3326    
3327    
3328  #===============================================================================  #===============================================================================
3329    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
3330    #  @trees = read_newick_trees( $file ) # reads to end of file
3331    #===============================================================================
3332    
3333    sub read_newick_tree
3334    {
3335        my $file = shift;
3336        my ( $fh, $close ) = open_input( $file );
3337        my $tree;
3338        my @lines = ();
3339        foreach ( <$fh> )
3340        {
3341            chomp;
3342            push @lines, $_;
3343            if ( /;/ )
3344            {
3345                $tree = parse_newick_tree_str( join( ' ', @lines ) );
3346                last;
3347            }
3348        }
3349        close $fh if $close;
3350    
3351        $tree;
3352    }
3353    
3354    
3355    sub read_newick_trees
3356    {
3357        my $file = shift;
3358        my ( $fh, $close ) = open_input( $file );
3359        my @trees = ();
3360        my @lines = ();
3361        foreach ( <$fh> )
3362        {
3363            chomp;
3364            push @lines, $_;
3365            if ( /;/ )
3366            {
3367                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
3368                @lines = ()
3369            }
3370        }
3371        close $fh if $close;
3372    
3373        @trees;
3374    }
3375    
3376    
3377    #===============================================================================
3378  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
3379  #  #
3380  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2337  Line 3545 
3545      #  Loop while it is a comment:      #  Loop while it is a comment:
3546      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3547          $ind++;          $ind++;
3548            my $depth = 1;
3549            my $ind2  = $ind;
3550    
3551          #  Find end          #  Find end
3552          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3553            {
3554                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3555                {
3556                    $ind2 += length( $1 );  #  Points at char just past [
3557                    $depth++;               #  If nested comments are allowed
3558                }
3559                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3560                {
3561                    $ind2 += length( $1 );  #  Points at char just past ]
3562                    $depth--;
3563                }
3564                else
3565                {
3566              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3567                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3568          }          }
3569          my $comment = $1;          }
3570    
3571          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3572          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3573    
3574          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3575    
3576          #  Skip white space          #  Skip white space
3577          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2367  Line 3590 
3590  #===============================================================================  #===============================================================================
3591  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3592  #  #
3593  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3594  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3595  #     $width  the approximate characters for the tree without labels  #
3596  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3597  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3598  #  #     $width  # the approximate characters for the tree without labels (D = 68)
3599  #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  #     $min_dx # the minimum horizontal branch length (D = 2)
3600  #===============================================================================  #     $dy     # the vertical space per taxon (D = 1, most compressed)
3601  sub printer_plot_newick {  #
3602      my ( $node, $file, $width, $min_dx, $dy ) = @_;  #  Options:
3603    #
3604      my ( $fh, $close );  #    dy     => nat_number    # the vertical space per taxon
3605      if ( ! defined( $file ) ) {  #    chars  => key           # line drawing character set:
3606          $fh = \*STDOUT;  #                            #     html_unicode
3607      }  #                            #     text (default)
3608      elsif ( $file =~ /^\*/ ) {  #    min_dx => whole_number  # the minimum horizontal branch length
3609          $fh = $file;  #    width  => whole_number  # approximate tree width without labels
3610      }  #
3611      else {  #===============================================================================
3612          open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  sub printer_plot_newick
3613          $close = 1;  {
3614      }      my ( $node, $file, @opts ) = @_;
3615    
3616        my ( $fh, $close ) = open_output( $file );
3617        $fh or return;
3618    
3619        my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3620                            && $opts[0]->{ chars }
3621                            && $opts[0]->{ chars } =~ /html/;
3622        print $fh '<PRE>' if $html;
3623        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3624        print $fh "</PRE>\n" if $html;
3625    
     print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";  
3626      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3627  }  }
3628    
3629    
3630  #===============================================================================  #===============================================================================
3631    #  Character sets for printer plot trees:
3632    #-------------------------------------------------------------------------------
3633    
3634    my %char_set =
3635      ( text1     => { space  => ' ',
3636                       horiz  => '-',
3637                       vert   => '|',
3638                       el_d_r => '/',
3639                       el_u_r => '\\',
3640                       el_d_l => '\\',
3641                       el_u_l => '/',
3642                       tee_l  => '+',
3643                       tee_r  => '+',
3644                       tee_u  => '+',
3645                       tee_d  => '+',
3646                       half_l => '-',
3647                       half_r => '-',
3648                       half_u => '|',
3649                       half_d => '|',
3650                       cross  => '+',
3651                     },
3652        text2     => { space  => ' ',
3653                       horiz  => '-',
3654                       vert   => '|',
3655                       el_d_r => '+',
3656                       el_u_r => '+',
3657                       el_d_l => '+',
3658                       el_u_l => '+',
3659                       tee_l  => '+',
3660                       tee_r  => '+',
3661                       tee_u  => '+',
3662                       tee_d  => '+',
3663                       half_l => '-',
3664                       half_r => '-',
3665                       half_u => '|',
3666                       half_d => '|',
3667                       cross  => '+',
3668                     },
3669        html_box  => { space  => '&nbsp;',
3670                       horiz  => '&#9472;',
3671                       vert   => '&#9474;',
3672                       el_d_r => '&#9484;',
3673                       el_u_r => '&#9492;',
3674                       el_d_l => '&#9488;',
3675                       el_u_l => '&#9496;',
3676                       tee_l  => '&#9508;',
3677                       tee_r  => '&#9500;',
3678                       tee_u  => '&#9524;',
3679                       tee_d  => '&#9516;',
3680                       half_l => '&#9588;',
3681                       half_r => '&#9590;',
3682                       half_u => '&#9589;',
3683                       half_d => '&#9591;',
3684                       cross  => '&#9532;',
3685                     },
3686        utf8_box  => { space  => ' ',
3687                       horiz  => chr(226) . chr(148) . chr(128),
3688                       vert   => chr(226) . chr(148) . chr(130),
3689                       el_d_r => chr(226) . chr(148) . chr(140),
3690                       el_u_r => chr(226) . chr(148) . chr(148),
3691                       el_d_l => chr(226) . chr(148) . chr(144),
3692                       el_u_l => chr(226) . chr(148) . chr(152),
3693                       tee_l  => chr(226) . chr(148) . chr(164),
3694                       tee_r  => chr(226) . chr(148) . chr(156),
3695                       tee_u  => chr(226) . chr(148) . chr(180),
3696                       tee_d  => chr(226) . chr(148) . chr(172),
3697                       half_l => chr(226) . chr(149) . chr(180),
3698                       half_r => chr(226) . chr(149) . chr(182),
3699                       half_u => chr(226) . chr(149) . chr(181),
3700                       half_d => chr(226) . chr(149) . chr(183),
3701                       cross  => chr(226) . chr(148) . chr(188),
3702                     },
3703      );
3704    
3705    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3706    $char_set{ html1 }->{ space } = '&nbsp;';
3707    
3708    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3709    $char_set{ html2 }->{ space } = '&nbsp;';
3710    
3711    #  Define some synonyms
3712    
3713    $char_set{ html } = $char_set{ html_box };
3714    $char_set{ line } = $char_set{ utf8_box };
3715    $char_set{ symb } = $char_set{ utf8_box };
3716    $char_set{ text } = $char_set{ text1 };
3717    $char_set{ utf8 } = $char_set{ utf8_box };
3718    
3719    #  Define tree formats and synonyms
3720    
3721    my %tree_format =
3722        ( text         => 'text',
3723          tree_tab_lbl => 'tree_tab_lbl',
3724          tree_lbl     => 'tree_lbl',
3725          chrlist_lbl  => 'chrlist_lbl',
3726          raw          => 'chrlist_lbl',
3727        );
3728    
3729    #===============================================================================
3730  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3731  #  #
3732  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3733  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3734  #     $min_dx the minimum horizontal branch length  #
3735  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3736    #     $width  # the approximate characters for the tree without labels (D = 68)
3737    #     $min_dx # the minimum horizontal branch length (D = 2)
3738    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3739    #
3740    #  Options:
3741    #
3742    #    chars  => keyword       # the output character set for the tree
3743    #    dy     => nat_number    # the vertical space per taxon
3744    #    format => keyword       # output format of each line
3745    #    min_dx => whole_number  # the minimum horizontal branch length
3746    #    width  => whole_number  # approximate tree width without labels
3747    #
3748    #  Character sets:
3749    #
3750    #    html       #  synonym of html1
3751    #    html_box   #  html encoding of unicode box drawing characters
3752    #    html1      #  text1 with nonbreaking spaces
3753    #    html2      #  text2 with nonbreaking spaces
3754    #    line       #  synonym of utf8_box
3755    #    raw        #  pass out the internal representation
3756    #    symb       #  synonym of utf8_box
3757    #    text       #  synonym of text1 (Default)
3758    #    text1      #  ascii characters: - + | / \ and space
3759    #    text2      #  ascii characters: - + | + + and space
3760    #    utf8       #  synonym of utf8_box
3761    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3762    #
3763    #  Formats for row lines:
3764    #
3765    #    text           #    $textstring              # Default
3766    #    tree_tab_lbl   #    $treestr \t $labelstr
3767    #    tree_lbl       # [  $treestr,  $labelstr ]
3768    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3769    #    raw            #  synonym of chrlist_lbl
3770  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3771  #===============================================================================  #===============================================================================
3772  sub text_plot_newick {  sub text_plot_newick
3773      my ( $node, $width, $min_dx, $dy ) = @_;  {
3774        my $node = shift @_;
3775      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3776      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3777      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3778      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3779        {
3780            $opts = shift;
3781        }
3782        else
3783        {
3784            ( $width, $min_dx, $dy ) = @_;
3785            $opts = {};
3786        }
3787    
3788        $chars = $opts->{ chars } || '';
3789        my $charH;
3790        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3791        my $is_box = $charH eq $char_set{ html_box }
3792                  || $charH eq $char_set{ utf8_box }
3793                  || $chars eq 'raw';
3794    
3795        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3796        $fmt = $tree_format{ $fmt || '' } || 'text';
3797    
3798        $dy    ||= $opts->{ dy     } ||  1;
3799        $width ||= $opts->{ width  } || 68;
3800        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3801        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3802    
3803        #  Layout the tree:
3804    
3805      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3806      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2419  Line 3809 
3809      my $hash = {};      my $hash = {};
3810      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3811    
3812      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3813    
3814      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3815      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3816        foreach ( ( $y1 .. $y2 ) )
3817        {
3818            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3819            my $lbl  = '';
3820            if ( @$line )
3821            {
3822                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3823                #  Translate tree characters
3824                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3825            }
3826    
3827            # Convert to requested output format:
3828    
3829            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3830                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3831                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3832                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3833                       :                          ();
3834        }
3835    
3836        # if ( $cells )
3837        # {
3838        #     my $nmax = 0;
3839        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3840        #     foreach ( @lines )
3841        #     {
3842        #         @$_ = map { "<TD>$_</TD>" } @$_;
3843        #         my $span = $nmax - @$_ + 1;
3844        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3845        #     }
3846        # }
3847        # elsif ( $tables )
3848        # {
3849        #     my $nmax = 0;
3850        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3851        #     foreach ( @lines )
3852        #     {
3853        #         @$_ = map { "<TD>$_</TD>" } @$_;
3854        #         my $span = $nmax - @$_ + 1;
3855        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3856        #     }
3857        # }
3858    
3859        wantarray ? @lines : \@lines;
3860  }  }
3861    
3862    
3863  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3864  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3865  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3866  sub layout_printer_plot {  sub layout_printer_plot
3867      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3868        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3869      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3870      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3871    
3872      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3873      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3874          $dx *= $x_scale;          $dx *= $x_scale;
3875          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3876      }      }
3877      else {      else {
3878          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2465  Line 3899 
3899          $ymax = $y0;          $ymax = $y0;
3900    
3901          foreach ( @dl ) {          foreach ( @dl ) {
3902              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3903                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3904                                                            );
3905              push @ylist, $yi;              push @ylist, $yi;
3906              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3907          }          }
# Line 2475  Line 3911 
3911    
3912          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3913          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3914          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3915    
3916            #  Handle special case of internal node label. Put it between subtrees.
3917    
3918            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3919                #  Find the descendents $i1 and $i2 to put the branch between
3920                my $i2 = 1;
3921                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3922                my $i1 = $i2 - 1;
3923                #  Get bottom of subtree1 and top of subtree2:
3924                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3925                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3926                #  Midway between bottom of subtree1 and top of subtree2, with
3927                #  preferred rounding direction
3928                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3929            }
3930      }      }
3931    
3932      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2485  Line 3936 
3936  }  }
3937    
3938    
3939    #  What symbol do we get if we add a leftward line to some other symbol?
3940    
3941    my %with_left_line = ( space  => 'half_l',
3942                           horiz  => 'horiz',
3943                           vert   => 'tee_l',
3944                           el_d_r => 'tee_d',
3945                           el_u_r => 'tee_u',
3946                           el_d_l => 'el_d_l',
3947                           el_u_l => 'el_u_l',
3948                           tee_l  => 'tee_l',
3949                           tee_r  => 'cross',
3950                           tee_u  => 'tee_u',
3951                           tee_d  => 'tee_d',
3952                           half_l => 'half_l',
3953                           half_r => 'horiz',
3954                           half_u => 'el_u_l',
3955                           half_d => 'el_d_l',
3956                           cross  => 'cross',
3957                         );
3958    
3959    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3960    #  Produce a description of one line of a printer plot tree.
3961    #
3962    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3963    #
3964    #     \@line is the character descriptions accumulated so far, one per array
3965    #          element, except for a label, which can be any number of characters.
3966    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3967    #          then $line->[-2] is a label. The calling program translates the
3968    #          symbol names to output characters.
3969    #
3970    #     \@node is a newick tree node
3971    #     \%hash contains tree layout information
3972    #      $row  is the row number (y value) that we are building
3973    #      $symb is the plot symbol proposed for the current x and y position
3974    #      $ilbl is true if internal node labels are allowed
3975    #
3976    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3977    sub text_tree_row
3978    {
3979        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3980    
3981        my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3982        if ( $row < $y1 || $row > $y2 ) { return $line }
3983    
3984        if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3985    
3986        if ( $row == $y ) {
3987            while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
3988            push @$line, $symb,
3989                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
3990        }
3991    
3992        elsif ( $row > $yn1 && $row < $yn2 ) {
3993            if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
3994            else               { $line->[$x] = 'vert' }
3995        }
3996    
3997        my @dl = newick_desc_list( $node );
3998    
3999        if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
4000    
4001        else {
4002            my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
4003            if ( @list > 1 ) { #  Fix top and bottom sympbols
4004                $list[ 0]->[1] = 'el_d_r';
4005                $list[-1]->[1] = 'el_u_r';
4006            }
4007            elsif ( @list ) {  # Only one descendent
4008                $list[ 0]->[1] = 'half_r';
4009            }
4010            foreach ( @list ) {
4011                my ( $n, $s ) = @$_;
4012                if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
4013                    $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
4014                }
4015            }
4016    
4017            if ( $row == $y ) {
4018                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
4019                                                         : $with_left_line{ $line->[$x] };
4020                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
4021            }
4022        }
4023    
4024        return $line;
4025    }
4026    
4027    
4028  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4029  #  Debug routine  #  Debug routine
4030  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Line 2510  Line 4050 
4050  }  }
4051    
4052    
4053  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #===============================================================================
4054  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Open an input file stream:
4055  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #
4056  sub text_tree_row {  #     ( $handle, undef ) = open_input(       );  # \*STDIN
4057      my ( $node, $hash, $row, $line, $symb ) = @_;  #     ( $handle, undef ) = open_input( \*FH  );
4058    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
4059    #
4060    #===============================================================================
4061    sub open_input
4062    {
4063        my $file = shift;
4064        my $fh;
4065        if    ( ! defined $file || $file eq '' ) { return ( \*STDIN ) }
4066        elsif ( ref( $file ) eq 'GLOB' )         { return ( $file   ) }
4067        elsif ( open( $fh, "<$file" ) )          { return ( $fh, 1  ) } # Need to close
4068    
4069      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      print STDERR "gjonewick::open_input could not open '$file' for reading\n";
4070      if ( $row < $y1 || $row > $y2 ) { return $line }      return undef;
4071    }
4072    
     if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }  
4073    
4074      if ( $row == $y ) {  #===============================================================================
4075          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");  #  Open an output file stream:
4076    #
4077    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
4078    #     ( $handle, undef ) = open_output( \*FH );
4079    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
4080    #
4081    #===============================================================================
4082    sub open_output
4083    {
4084        my $file = shift;
4085        my $fh;
4086        if    ( ! defined $file || $file eq '' ) { return ( \*STDOUT ) }
4087        elsif ( ref( $file ) eq 'GLOB' )         { return ( $file    ) }
4088        elsif ( ( open $fh, ">$file" ) )         { return ( $fh, 1   ) } # Need to close
4089    
4090        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
4091        return undef;
4092      }      }
4093    
4094      elsif ( $row > $yn1 && $row < $yn2 ) {  
4095          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }  #===============================================================================
4096          else { substr( $line, $x ) = "|" }  #  Some subroutines copied from gjolists
4097    #===============================================================================
4098    #  Return the common prefix of two lists:
4099    #
4100    #  @common = common_prefix( \@list1, \@list2 )
4101    #-----------------------------------------------------------------------------
4102    sub common_prefix
4103    {
4104        my ($l1, $l2) = @_;
4105        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4106        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4107    
4108        my $i = 0;
4109        my $l1_i;
4110        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
4111    
4112        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
4113      }      }
4114    
     my @dl = newick_desc_list( $node );  
4115    
4116      if ( @dl < 1 ) {  #-----------------------------------------------------------------------------
4117          $line .= " " . $node->[1];  #  Return the unique suffixes of each of two lists:
4118    #
4119    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
4120    #-----------------------------------------------------------------------------
4121    sub unique_suffixes
4122    {
4123        my ($l1, $l2) = @_;
4124        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4125        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4126    
4127        my $i = 0;
4128        my @l1 = @$l1;
4129        my @l2 = @$l2;
4130        my $l1_i;
4131        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
4132    
4133        splice @l1, 0, $i;
4134        splice @l2, 0, $i;
4135        return ( \@l1, \@l2 );
4136      }      }
4137    
     else {  
         my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line  
         $list[ 0]->[1] = "/";  
         $list[-1]->[1] = "\\";  
4138    
4139          foreach ( @list ) {  #-------------------------------------------------------------------------------
4140              my ( $n, $s ) = @$_;  #  List of values duplicated in a list (stable in order by second occurance):
4141              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {  #
4142                  $line = text_tree_row( $n, $hash, $row, $line, $s );  #  @dups = duplicates( @list )
4143    #-------------------------------------------------------------------------------
4144    sub duplicates
4145    {
4146        my %cnt = ();
4147        grep { ++$cnt{$_} == 2 } @_;
4148              }              }
4149    
4150    
4151    #-------------------------------------------------------------------------------
4152    #  Randomize the order of a list:
4153    #
4154    #  @random = random_order( @list )
4155    #-------------------------------------------------------------------------------
4156    sub random_order
4157    {
4158        my ( $i, $j );
4159        for ( $i = @_ - 1; $i > 0; $i-- )
4160        {
4161            $j = int( ($i+1) * rand() );
4162            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
4163           }           }
4164    
4165          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }     @_;
4166      }      }
4167    
4168      return $line;  
4169    #-----------------------------------------------------------------------------
4170    #  Intersection of two or more sets:
4171    #
4172    #  @intersection = intersection( \@set1, \@set2, ... )
4173    #-----------------------------------------------------------------------------
4174    sub intersection
4175    {
4176        my $set = shift;
4177        my @intersection = @$set;
4178    
4179        foreach $set ( @_ )
4180        {
4181            my %set = map { $_ => 1 } @$set;
4182            @intersection = grep { exists $set{ $_ } } @intersection;
4183        }
4184    
4185        @intersection;
4186    }
4187    
4188    
4189    #-----------------------------------------------------------------------------
4190    #  Elements in set 1, but not set 2:
4191    #
4192    #  @difference = set_difference( \@set1, \@set2 )
4193    #-----------------------------------------------------------------------------
4194    sub set_difference
4195    {
4196        my ($set1, $set2) = @_;
4197        my %set2 = map { $_ => 1 } @$set2;
4198        grep { ! ( exists $set2{$_} ) } @$set1;
4199  }  }
4200    
4201    

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3