[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Dec 19 17:32:48 2006 UTC revision 1.19, Tue Apr 13 20:33:16 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 46 
46  #  #
47  #     $tree = \@rootnode;  #     $tree = \@rootnode;
48  #  #
49  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
50  #                $label, #  node label  #                $label, #  node label
51  #                $x,     #  branch length  #                $x,     #  branch length
52  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 54 
54  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
55  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
56  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
57  #             )  #             ]
58  #  #
59  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
60  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 65 
65  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
66  #  #
67  #  #
68    #  Ross Overbeek has a different tree node structure:
69    #
70    #     $node = [ Label,
71    #               DistanceToParent,
72    #               [ ParentPointer, ChildPointer1, ... ],
73    #               [ Name1\tVal1, Name2\tVal2, ... ]
74    #             ]
75    #
76    #  So:
77    #
78    #===============================================================================
79    #  Tree format interconversion:
80    #===============================================================================
81    #
82    #  $bool      = is_overbeek_tree( $tree )
83    #  $bool      = is_gjonewick_tree( $tree )
84    #
85    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
86    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
87    #
88  #===============================================================================  #===============================================================================
89  #  Tree data extraction:  #  Tree data extraction:
90  #===============================================================================  #===============================================================================
# Line 90  Line 112 
112  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
113  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
114  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
115  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
116    #
117    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
118  #  #
119  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
120  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
# Line 99  Line 123 
123  #  #
124  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
125  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
126    # \@tiprefs = newick_tip_ref_list( $noderef )
127  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
128    # \@tips    = newick_tip_list( $noderef )
129    #
130  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
131  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
132    #
133  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
134    # \@tips    = newick_duplicated_tips( $noderef )
135    #
136  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
137    #
138  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
139    # \@tips    = newick_shared_tips( $tree1, $tree2 )
140  #  #
141  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
142    #
143    #  %tip_distances = newick_tip_distances( $noderef )
144    # \%tip_distances = newick_tip_distances( $noderef )
145    #
146  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
147  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
148  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
149    #
150    #  Tree tip insertion point (tip is on branch of length x that
151    #  is inserted into branch connecting node1 and node2, a distance
152    #  x1 from node1 and x2 from node2):
153    #
154    #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
155    #
156    #  Standardized label for a node in terms of intersection of 3 lowest sorting
157    #  tips (sort is lower case):
158    #
159    #  @TipOrTips = std_node_name( $tree, $node )
160  #  #
161  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
162  #  Paths from root of tree:  #  Paths from root of tree:
# Line 140  Line 187 
187  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
188  #  #
189  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
190  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
191  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
192  #  before modifying.  #  before modifying.
193  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 204 
204  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
205  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
206  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
207    #  $node      = newick_rescale_branches( $node, $factor )
208    #  $node      = newick_modify_branches( $node, \&function )
209    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
210    #
211    #  Modify comments:
212    #
213    #  $node = newick_strip_comments( $node )
214  #  #
215  #  Modify rooting and/or order:  #  Modify rooting and/or order:
216  #  #
# Line 170  Line 224 
224  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
225  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
226  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
227  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
228    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
229    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
230  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
231  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
232  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
233  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
234  #  #
235  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
236    #  $newtree = rooted_newick_subtree( $tree,  @tips )
237    #  $newtree = rooted_newick_subtree( $tree, \@tips )
238  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
239  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
240    #  $newtree = newick_covering_subtree( $tree,  @tips )
241    #  $newtree = newick_covering_subtree( $tree, \@tips )
242  #  #
243  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
244  #  #
245    #  $node = newick_insert_at_node( $node, $subtree )
246    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
247    #
248    #===============================================================================
249    #  Tree neighborhood: subtree of n tips to represent a larger tree.
250    #===============================================================================
251    #
252    #  Focus around root:
253    #
254    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
255    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
256    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
257    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
258    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
259    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
260    #
261    #  Focus around a tip insertion point (the tip is not in the subtree):
262    #
263    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
264    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
265    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
266    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
267    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
268    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
269    #
270  #===============================================================================  #===============================================================================
271  #  Tree reading and writing:  #  Tree reading and writing:
272  #===============================================================================  #===============================================================================
273    #  Write machine-readable trees:
274  #  #
275  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
276  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
277  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
278  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
279  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
280  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
281    #
282    #  Write human-readable trees:
283    #
284  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
285  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
286  #  #
287    #  Read trees:
288    #
289    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
290    #  @trees = read_newick_trees( $file ) # reads to end of file
291  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
292  #  #
293  #===============================================================================  #===============================================================================
294    
295    
296    use Carp;
297    use Data::Dumper;
298    use strict;
299    
300  require Exporter;  require Exporter;
301    
302  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
303  our @EXPORT = qw(  our @EXPORT = qw(
304            is_overbeek_tree
305            is_gjonewick_tree
306            overbeek_to_gjonewick
307            gjonewick_to_overbeek
308            newick_is_valid
309          newick_is_rooted          newick_is_rooted
310          newick_is_unrooted          newick_is_unrooted
311          tree_rooted_on_tip          tree_rooted_on_tip
312          newick_is_bifurcating          newick_is_bifurcating
313          newick_tip_count          newick_tip_count
314            newick_tip_ref_list
315          newick_tip_list          newick_tip_list
316    
317          newick_first_tip          newick_first_tip
318          newick_duplicated_tips          newick_duplicated_tips
319          newick_tip_in_tree          newick_tip_in_tree
320          newick_shared_tips          newick_shared_tips
321    
322          newick_tree_length          newick_tree_length
323            newick_tip_distances
324          newick_max_X          newick_max_X
325          newick_most_distant_tip_ref          newick_most_distant_tip_ref
326          newick_most_distant_tip_name          newick_most_distant_tip_name
327    
328            newick_tip_insertion_point
329    
330          std_newick_name          std_newick_name
331    
332          path_to_tip          path_to_tip
# Line 241  Line 348 
348          newick_set_undefined_branches          newick_set_undefined_branches
349          newick_set_all_branches          newick_set_all_branches
350          newick_fix_negative_branches          newick_fix_negative_branches
351            newick_rescale_branches
352            newick_modify_branches
353    
354            newick_strip_comments
355    
356          normalize_newick_tree          normalize_newick_tree
357          reverse_newick_tree          reverse_newick_tree
# Line 254  Line 365 
365          reroot_newick_next_to_tip          reroot_newick_next_to_tip
366          reroot_newick_to_node          reroot_newick_to_node
367          reroot_newick_to_node_ref          reroot_newick_to_node_ref
368            reroot_newick_between_nodes
369            reroot_newick_to_midpoint
370            reroot_newick_to_midpoint_w
371          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
372          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
373          uproot_tip_rooted_newick          uproot_tip_rooted_newick
374          uproot_newick          uproot_newick
375    
376          prune_from_newick          prune_from_newick
377            rooted_newick_subtree
378          newick_subtree          newick_subtree
379            newick_covering_subtree
380          collapse_zero_length_branches          collapse_zero_length_branches
381    
382            newick_insert_at_node
383            newick_insert_between_nodes
384    
385            root_neighborhood_representative_tree
386            root_neighborhood_representative_tips
387            tip_neighborhood_representative_tree
388            tip_neighborhood_representative_tips
389    
390          writeNewickTree          writeNewickTree
391          fwriteNewickTree          fwriteNewickTree
392          strNewickTree          strNewickTree
393          formatNewickTree          formatNewickTree
394    
395            read_newick_tree
396            read_newick_trees
397          parse_newick_tree_str          parse_newick_tree_str
398    
399          printer_plot_newick          printer_plot_newick
# Line 305  Line 432 
432          );          );
433    
434    
 use gjolists qw(  
         common_prefix  
         common_and_unique  
         unique_suffixes  
   
         unique_set  
         duplicates  
         random_order  
   
         union  
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
435  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
436  #  Internally used definitions  #  Internally used definitions
437  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 332  Line 441 
441    
442    
443  #===============================================================================  #===============================================================================
444    #  Interconvert overbeek and gjonewick trees:
445    #===============================================================================
446    
447    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
448    
449    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
450    
451    sub overbeek_to_gjonewick
452    {
453        return () unless ref( $_[0] ) eq 'ARRAY';
454        my ( $lbl, $x, $desc ) = @{ $_[0] };
455        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
456        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
457    }
458    
459    sub gjonewick_to_overbeek
460    {
461        return () unless ref( $_[0] ) eq 'ARRAY';
462        my ( $desc, $lbl, $x ) = @{ $_[0] };
463        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
464        my $parent = $_[1];
465        my $node = [ $lbl, $x, undef, [] ];
466        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
467        return $node;
468    }
469    
470    
471    #===============================================================================
472  #  Extract tree structure values:  #  Extract tree structure values:
473  #===============================================================================  #===============================================================================
474  #  #
# Line 351  Line 488 
488  #  #
489  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
490    
491  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
492  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
493  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
494  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
495  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
496  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
497  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
498  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
499    
500  sub newick_desc_list {  sub newick_desc_list {
501      my $node = $_[0];      my $node = $_[0];
# Line 427  Line 564 
564  #===============================================================================  #===============================================================================
565  #  Some tree property tests:  #  Some tree property tests:
566  #===============================================================================  #===============================================================================
567    #  Tree is valid?
568    #
569    #  $bool = newick_is_valid( $node, $verbose )
570    #-------------------------------------------------------------------------------
571    sub newick_is_valid
572    {
573        my $node = shift;
574    
575        if ( ! array_ref( $node ) )
576        {
577            print STDERR "Node is not array reference\n" if $_[0];
578            return 0;
579        }
580    
581        my @node = @$node;
582        if ( ! @node )
583        {
584            print STDERR "Node is empty array reference\n" if $_[0];
585            return 0;
586        }
587    
588        # Must have descendant or label:
589    
590        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
591        {
592            print STDERR "Node has neither descendant nor label\n" if $_[0];
593            return 0;
594        }
595    
596        #  If comments are present, they must be array references
597    
598        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
599        {
600            if ( defined( $_ ) && ! array_ref( $_ ) )
601            {
602                print STDERR "Node has neither descendant or label\n" if $_[0];
603                return 0;
604            }
605        }
606    
607        #  Inspect the descendants:
608    
609        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
610        {
611            newick_is_valid( $_, @_ ) || return 0
612        }
613    
614        return 1;
615    }
616    
617    
618    #-------------------------------------------------------------------------------
619  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
620  #  #
621  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 512  Line 701 
701  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
702  #  List of tip nodes:  #  List of tip nodes:
703  #  #
704  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
705    # \@tips = newick_tip_ref_list( $noderef )
706  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
707  sub newick_tip_ref_list {  sub newick_tip_ref_list {
708      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 529  Line 719 
719          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
720      }      }
721    
722      @list;      wantarray ? @list : \@list;
723  }  }
724    
725    
# Line 537  Line 727 
727  #  List of tips:  #  List of tips:
728  #  #
729  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
730    # \@tips = newick_tip_list( $node )
731  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
732  sub newick_tip_list {  sub newick_tip_list {
733      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
734        wantarray ? @tips : \@tips;
735  }  }
736    
737    
# Line 578  Line 770 
770  #  List of duplicated tip labels.  #  List of duplicated tip labels.
771  #  #
772  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
773    # \@tips = newick_duplicated_tips( $node )
774  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
775  sub newick_duplicated_tips {  sub newick_duplicated_tips {
776      duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
777        wantarray ? @tips : \@tips;
778  }  }
779    
780    
# Line 611  Line 805 
805  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
806  #  #
807  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
808    # \@tips = newick_shared_tips( $tree1, $tree2 )
809  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
810  sub newick_shared_tips {  sub newick_shared_tips {
811      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
812      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
813      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
814      intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
815        wantarray ? @tips : \@tips;
816  }  }
817    
818    
# Line 638  Line 834 
834    
835    
836  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
837    #  Hash of tip nodes and corresponding distances from root:
838    #
839    #   %tip_distances = newick_tip_distances( $node )
840    #  \%tip_distances = newick_tip_distances( $node )
841    #-------------------------------------------------------------------------------
842    sub newick_tip_distances
843    {
844        my ( $node, $x, $hash ) = @_;
845        my $root = ! $hash;
846        ref( $hash ) eq 'HASH' or $hash = {};
847    
848        $x ||= 0;
849        $x  += newick_x( $node ) || 0;
850    
851        #  Is it a tip?
852    
853        my $n_desc = newick_n_desc( $node );
854        if ( ! $n_desc )
855        {
856            $hash->{ newick_lbl( $node ) } = $x;
857            return $hash;
858        }
859    
860        #  Tree rooted on tip?
861    
862        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
863        {
864            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
865        }
866    
867        foreach ( newick_desc_list( $node ) )
868        {
869            newick_tip_distances( $_, $x, $hash );
870        }
871    
872        wantarray ? %$hash : $hash;
873    }
874    
875    
876    #-------------------------------------------------------------------------------
877  #  Tree max X.  #  Tree max X.
878  #  #
879  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 712  Line 948 
948    
949    
950  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
951    #  Tree tip insertion point (with standard node labels):
952    #
953    #  [ $node1, $x1, $node2, $x2, $x ]
954    #           = newick_tip_insertion_point( $tree, $tip )
955    #
956    #  Which means: tip is on a branch of length x that is inserted into the branch
957    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
958    #
959    #                x1    +------ n1a (lowest sorting tip of this subtree)
960    #            +--------n1
961    #            |         +------n1b (lowest sorting tip of this subtree)
962    #  tip-------n
963    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
964    #            +------n2
965    #               x2   +-------- n2b (lowest sorting tip of this subtree)
966    #
967    #  The designations of 1 vs 2, and a vs b are chosen such that:
968    #     n1a < n1b, and n2a < n2b, and n1a < n2a
969    #
970    #  Then the statandard description becomes:
971    #
972    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
973    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
974    #    x
975    #  ]
976    #
977    #-------------------------------------------------------------------------------
978    sub newick_tip_insertion_point
979    {
980        my ( $tree, $tip ) = @_;
981        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
982        $tree = copy_newick_tree( $tree )           or return undef;
983        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
984        my $node = $tree;
985    
986        my $x  = 0;                        # Distance to node
987        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
988        $node  = $dl->[0];                 # Node adjacent to tip
989        $dl    = newick_desc_ref( $node );
990        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
991        {
992            $node = $dl->[0];
993            $x   += newick_x( $node );
994            $dl   = newick_desc_ref( $node );
995        }
996        $x += newick_x( $node );
997    
998        #  We are now at the node that is the insertion point.
999        #  Is it a tip?
1000    
1001        my @description;
1002    
1003        if ( ( ! $dl ) || @$dl == 0 )
1004        {
1005            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
1006        }
1007    
1008        #  Is it a trifurcation or greater, in which case it does not go
1009        #  away with tip deletion?
1010    
1011        elsif ( @$dl > 2 )
1012        {
1013            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
1014        }
1015    
1016        #  The node is bifurcating.  We need to describe it.
1017    
1018        else
1019        {
1020            my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1021            my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1022    
1023            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1024            if ( @$n2 == 2 )
1025            {
1026                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
1027            }
1028            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
1029            @description = ( $n1, $x1, $n2, $x2, $x );
1030        }
1031    
1032        return wantarray ? @description : \@description;
1033    }
1034    
1035    
1036    sub describe_descendant
1037    {
1038        my $node = shift;
1039    
1040        my $x  = 0;                        # Distance to node
1041        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
1042        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
1043        {
1044            $node = $dl->[0];
1045            $x   += newick_x( $node );
1046            $dl   = newick_desc_ref( $node );
1047        }
1048        $x += newick_x( $node );
1049    
1050        #  Is it a tip?  Return list of one tip;
1051    
1052        if ( ( ! $dl ) || @$dl == 0 )
1053        {
1054            return ( [ newick_lbl( $node ) ], $x );
1055        }
1056    
1057        #  Get tips of each descendent, keeping lowest sorting from each.
1058        #  Return the two lowest of those (the third will come from the
1059        #  other side of the original node).
1060    
1061        my @rep_tips = sort { lc $a cmp lc $b }
1062                       map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1063                       @$dl;
1064        return ( [ @rep_tips[0,1] ], $x );
1065    }
1066    
1067    
1068    #-------------------------------------------------------------------------------
1069  #  Standard node name:  #  Standard node name:
1070  #     Tip label if at a tip  #     Tip label if at a tip
1071  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1072  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1073  #  #
1074  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1075  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1076  sub std_node_name {  sub std_node_name {
1077      my $tree = $_[0];      my $tree = $_[0];
# Line 734  Line 1088 
1088      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
1089      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1090    
1091      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
1092      my @best = map {      my @best = map
1093              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );            {
1094              @rest = set_difference( \@rest, \@tips );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1095                @rest = &set_difference( \@rest, \@tips );
1096              $tips[0];              $tips[0];
1097          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1098    
# Line 825  Line 1180 
1180      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1181      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1182         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1183         if ( @path ) { return @path }          return @path if @path;
1184      }      }
1185    
1186      ();  #  Not found      ();  #  Not found
# Line 856  Line 1211 
1211      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1212    
1213      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1214      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1215      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1216      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1217    
1218      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1219      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 909  Line 1264 
1264      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1265    
1266      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1267      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1268      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1269      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1270    
# Line 930  Line 1285 
1285    
1286      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1287                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1288      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1289      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1290    
1291      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1292      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1293      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1294      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1295    
# Line 1150  Line 1504 
1504      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1505    
1506      my $n = 0;      my $n = 0;
1507      if ( $not_root ) {      if ( $not_root )
1508        {
1509          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1510          $n++;          $n++;
1511      }      }
1512    
1513      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1514        {
1515          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1516      }      }
1517    
# Line 1164  Line 1520 
1520    
1521    
1522  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1523    #  Rescale all branch lenghts by factor.
1524    #
1525    #  $node = newick_rescale_branches( $node, $factor )
1526    #-------------------------------------------------------------------------------
1527    sub newick_rescale_branches {
1528        my ( $node, $factor ) = @_;
1529    
1530        my $x = newick_x( $node );
1531        set_newick_x( $node, $factor * $x ) if $x;
1532    
1533        foreach ( newick_desc_list( $node ) )
1534        {
1535            newick_rescale_branches( $_, $factor );
1536        }
1537    
1538        $node;
1539    }
1540    
1541    
1542    #-------------------------------------------------------------------------------
1543    #  Modify all branch lengths by a function.
1544    #
1545    #     $node = newick_modify_branches( $node, \&function )
1546    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1547    #
1548    #  Function must have form
1549    #
1550    #     $x2 = &$function( $x1 )
1551    #     $x2 = &$function( $x1, @$func_parms )
1552    #
1553    #-------------------------------------------------------------------------------
1554    sub newick_modify_branches {
1555        my ( $node, $func, $parm ) = @_;
1556    
1557        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1558        foreach ( newick_desc_list( $node ) )
1559        {
1560            newick_modify_branches( $_, $func, $parm )
1561        }
1562    
1563        $node;
1564    }
1565    
1566    
1567    #-------------------------------------------------------------------------------
1568  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1569  #  #
1570  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1189  Line 1590 
1590    
1591    
1592  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1593    #  Remove comments from a newick tree (e.g., before writing for phylip).
1594    #
1595    #  $node = newick_strip_comments( $node )
1596    #-------------------------------------------------------------------------------
1597    sub newick_strip_comments {
1598        my ( $node ) = @_;
1599    
1600        @$node = @$node[ 0 .. 2 ];
1601        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1602        $node;
1603    }
1604    
1605    
1606    #-------------------------------------------------------------------------------
1607  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1608  #  #
1609  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1297  Line 1712 
1712      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1713      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1714    
     #  Reorder this subtree:  
   
1715      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1716      if    ( $dir < 0 ) {                   #  Big group first  
1717          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1718    
1719        if ( $dir )
1720        {
1721            #  Big group first
1722            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1723    
1724            my ( @dl1, @dl2 );
1725            for ( my $i = 0; $i < $nd; $i++ ) {
1726                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1727      }      }
1728      elsif ( $dir > 0 ) {                   #  Small group first  
1729          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1730                                    : ( @dl2, reverse @dl1 );
1731      }      }
1732    
1733      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1336  Line 1759 
1759  #  #
1760  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1761  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1762  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1763    {
1764      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1765      my %cnt;      my %cnt;
1766    
# Line 1356  Line 1780 
1780  #           = 0 for no change, and  #           = 0 for no change, and
1781  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1782  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1783  sub reorder_against_tip_count {  sub reorder_against_tip_count
1784    {
1785      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1786    
1787      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1395  Line 1820 
1820  #  #
1821  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1822  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1823  sub random_order_newick_tree {  sub random_order_newick_tree
1824    {
1825      my ( $node ) = @_;      my ( $node ) = @_;
1826    
1827      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1404  Line 1830 
1830      #  Reorder this subtree:      #  Reorder this subtree:
1831    
1832      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1833      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1834    
1835      #  Reorder descendants:      #  Reorder descendants:
1836    
# Line 1419  Line 1845 
1845  #  #
1846  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1847  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1848  sub reroot_newick_by_path {  sub reroot_newick_by_path
1849    {
1850      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1851      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1852    
# Line 1432  Line 1859 
1859      #      #
1860      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1861      #      #
1862      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1863    
1864      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1865      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1447  Line 1874 
1874    
1875      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1876      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1877      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1878    
1879      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1880    
# Line 1527  Line 1954 
1954    
1955    
1956  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1957  #  Move root of tree to an approximate midpoint.  #  Reroot a newick tree along the path between 2 nodes:
1958  #  #
1959  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
1960  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1961  sub reroot_newick_to_approx_midpoint {  sub reroot_newick_between_nodes
1962      my ( $tree ) = @_;  {
1963        my ( $tree, $node1, $node2, $fraction ) = @_;
1964      #  Compile average tip to node distances assending      array_ref( $tree ) or return undef;
1965        $fraction >= 0 && $fraction <= 1 or return undef;
1966    
1967      my $dists1 = average_to_tips_1( $tree );      #  Find the paths to the nodes:
1968    
1969      #  Compile average tip to node distances descending, returning midpoint node      my @path1 = path_to_node( $tree, $node1 ) or return undef;
1970        my @path2 = path_to_node( $tree, $node2 ) or return undef;
1971    
1972      my $node = average_to_tips_2( $dists1, undef, undef );      reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
1973    }
1974    
     #  Reroot  
1975    
1976      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree  #-------------------------------------------------------------------------------
1977  }  #  Reroot a newick tree along the path between 2 nodes:
1978    #
1979    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
1980    #-------------------------------------------------------------------------------
1981    sub reroot_newick_between_node_refs
1982    {
1983        my ( $tree, $node1, $node2, $fraction ) = @_;
1984        array_ref( $tree ) or return undef;
1985    
1986        #  Find the paths to the nodes:
1987    
1988  sub average_to_tips_1 {      my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
1989      my ( $node ) = @_;      my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
1990    
1991      my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );      reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
     my $x_below = 0;  
     if ( @desc_dists )  
     {  
         foreach ( @desc_dists ) { $x_below += $_->[0] }  
         $x_below /= @desc_dists;  
1992      }      }
     my $x = newick_x( $node ) || 0;  
     my $x_net = $x_below + $x;  
1993    
     [ $x_net, $x, $x_below, [ @desc_dists ], $node ]  
 }  
1994    
1995    #-------------------------------------------------------------------------------
1996    #  Reroot a newick tree along the path between 2 nodes defined by paths:
1997    #
1998    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
1999    #-------------------------------------------------------------------------------
2000    sub reroot_newick_between_nodes_by_path
2001    {
2002        my ( $tree, $path1, $path2, $fraction ) = @_;
2003        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
2004           or return undef;
2005        $fraction >= 0 && $fraction <= 1 or return undef;
2006    
2007  sub average_to_tips_2 {      my @path1 = @$path1;
2008      my ( $dists1, $x_above, $anc_node ) = @_;      my @path2 = @$path2;
     my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;  
2009    
2010      #  Are we done?  Root is in this node's branch, or "above"?      #  Trim the common prefix, saving it:
2011    
2012      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @prefix = ();
2013      # print STDERR "x       = $x\n";      while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2014      # print STDERR "x_below = $x_below\n";      {
2015      # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";          push @prefix, splice( @path1, 0, 2 );
2016            splice( @path2, 0, 2 );
2017        }
2018    
2019      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      my ( @path, $dist );
2020        if    ( @path1 < 3 )
2021      {      {
2022          #  At this point the root can only be in this node's branch,          @path2 >= 3 or return undef;              # node1 = node2
2023          #  or "above" it in the current rooting of the tree (which          $dist = $fraction * newick_path_length( @path2 );
2024          #  would mean that the midpoint is actually down a different          @path = @path2;
2025        }
2026        elsif ( @path2 < 3 )
2027        {
2028            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2029            @path = @path1;
2030        }
2031        else
2032        {
2033            my $dist1 = newick_path_length( @path1 );
2034            my $dist2 = newick_path_length( @path2 );
2035            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2036            @path = ( $dist <= 0 ) ? @path1 : @path2;
2037            $dist = abs( $dist );
2038        }
2039    
2040        #  Descend tree until we reach the insertion branch:
2041    
2042        my $x;
2043        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2044        {
2045            $dist -= $x;
2046            push @prefix, splice( @path, 0, 2 );
2047        }
2048    
2049        #  Insert the new node:
2050    
2051        my $newnode = [ [ $path[2] ], undef, $dist ];
2052        set_newick_desc_i( $path[0], $path[1], $newnode );
2053        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2054    
2055        #  We can now build the path from root to the new node
2056    
2057        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2058    }
2059    
2060    
2061    #-------------------------------------------------------------------------------
2062    #  Move root of tree to an approximate midpoint.
2063    #
2064    #  $newtree = reroot_newick_to_approx_midpoint( $tree )
2065    #-------------------------------------------------------------------------------
2066    sub reroot_newick_to_approx_midpoint {
2067        my ( $tree ) = @_;
2068    
2069        #  Compile average tip to node distances assending
2070    
2071        my $dists1 = average_to_tips_1( $tree );
2072    
2073        #  Compile average tip to node distances descending, returning midpoint
2074        #  cadidates as a list of [ $node1, $node2, $fraction ]
2075    
2076        my @mids = average_to_tips_2( $dists1, undef, undef );
2077    
2078        #  Reroot to first midpoint candidate
2079    
2080        return $tree if ! @mids;
2081        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2082        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2083    }
2084    
2085    
2086    #-------------------------------------------------------------------------------
2087    #  Move root of tree to a midpoint.
2088    #
2089    #  $newtree = reroot_newick_to_midpoint( $tree )
2090    #-------------------------------------------------------------------------------
2091    sub reroot_newick_to_midpoint {
2092        my ( $tree ) = @_;
2093    
2094        #  Compile average tip to node distances assending
2095    
2096        my $dists1 = average_to_tips_1( $tree );
2097    
2098        #  Compile average tip to node distances descending, returning midpoint
2099        #  [ $node1, $node2, $fraction ]
2100    
2101        my @mids = average_to_tips_2( $dists1, undef, undef );
2102    
2103        @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2104    }
2105    
2106    
2107    #-------------------------------------------------------------------------------
2108    #  Compile average tip to node distances assending
2109    #-------------------------------------------------------------------------------
2110    sub average_to_tips_1 {
2111        my ( $node ) = @_;
2112    
2113        my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );
2114        my $x_below = 0;
2115        if ( @desc_dists )
2116        {
2117            foreach ( @desc_dists ) { $x_below += $_->[0] }
2118            $x_below /= @desc_dists;
2119        }
2120    
2121        my $x = newick_x( $node ) || 0;
2122        my $x_net = $x_below + $x;
2123    
2124        [ $x_net, $x, $x_below, [ @desc_dists ], $node ]
2125    }
2126    
2127    
2128    #-------------------------------------------------------------------------------
2129    #  Compile average tip to node distances descending, returning midpoint as
2130    #  [ $node1, $node2, $fraction_of_dist_between ]
2131    #-------------------------------------------------------------------------------
2132    sub average_to_tips_2 {
2133        my ( $dists1, $x_above, $anc_node ) = @_;
2134        my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2135    
2136        #  Are we done?  Root is in this node's branch, or "above"?
2137    
2138        my @mids = ();
2139        if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2140        {
2141            #  At this point the root can only be in this node's branch,
2142            #  or "above" it in the current rooting of the tree (which
2143            #  would mean that the midpoint is actually down a different
2144          #  path from the root of the current tree).          #  path from the root of the current tree).
2145          #          #
2146          #  Is the root in the current branch?          #  Is the root in the current branch?
2147    
2148          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2149          {          {
2150              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2151          }              #  the way from $node to $anc_node:
2152          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2153          {                                     : 0.5;
2154              return undef;              push @mids, [ $node, $anc_node, $fract ];
2155          }          }
2156      }      }
2157    
2158      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2159    
2160      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2161      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1605  Line 2165 
2165          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2166    
2167          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2168          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2169      }      }
2170    
2171      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2172  }  }
2173    
2174    
# Line 1623  Line 2180 
2180  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2181      my ( $tree ) = @_;      my ( $tree ) = @_;
2182    
2183        #  Compile average tip to node distances assending from tips
2184    
2185        my $dists1 = average_to_tips_1_w( $tree );
2186    
2187        #  Compile average tip to node distances descending, returning midpoints
2188    
2189        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2190    
2191        #  Reroot to first midpoint candidate
2192    
2193        return $tree if ! @mids;
2194        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2195        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2196    }
2197    
2198    
2199    #-------------------------------------------------------------------------------
2200    #  Move root of tree to an approximate midpoint.  Weight by tips.
2201    #
2202    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2203    #-------------------------------------------------------------------------------
2204    sub reroot_newick_to_midpoint_w {
2205        my ( $tree ) = @_;
2206    
2207      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2208    
2209      my $dists1 = average_to_tips_1_w( $tree );      my $dists1 = average_to_tips_1_w( $tree );
2210    
2211      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2212    
2213      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2214    
2215      #  Reroot      #  Reroot at first candidate midpoint
2216    
2217      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2218  }  }
2219    
2220    
# Line 1654  Line 2235 
2235          }          }
2236          $x_below /= $n_below;          $x_below /= $n_below;
2237      }      }
2238    
2239      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2240      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2241    
# Line 1667  Line 2249 
2249    
2250      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2251    
2252      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2253      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2254      {      {
2255          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1679  Line 2257 
2257          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2258          #  path from the root of the current tree).          #  path from the root of the current tree).
2259          #          #
2260          #  Is the root in the current branch?          #  Is their a root in the current branch?
2261    
2262          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2263          {          {
2264              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2265          }              #  the way from $node to $anc_node:
2266          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2267          {                                     : 0.5;
2268              return undef;              push @mids, [ $node, $anc_node, $fract ];
2269          }          }
2270      }      }
2271    
# Line 1707  Line 2285 
2285    
2286          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2287                                   : 0;                                   : 0;
2288          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2289      }      }
2290    
2291      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2292  }  }
2293    
2294    
# Line 1859  Line 2434 
2434      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2435  }  }
2436    
2437    #-------------------------------------------------------------------------------
2438    #  Add a subtree to a newick tree node:
2439    #
2440    #  $node = newick_insert_at_node( $node, $subtree )
2441    #-------------------------------------------------------------------------------
2442    sub newick_insert_at_node
2443    {
2444        my ( $node, $subtree ) = @_;
2445        array_ref( $node ) && array_ref( $subtree ) or return undef;
2446    
2447        #  We could check validity of trees, but ....
2448    
2449        my $dl = newick_desc_ref( $node );
2450        if ( array_ref( $dl ) )
2451        {
2452            push @$dl, $subtree;
2453        }
2454        else
2455        {
2456            set_newick_desc_ref( $node, [ $subtree ] );
2457        }
2458        return $node;
2459    }
2460    
2461    
2462    #-------------------------------------------------------------------------------
2463    #  Insert a subtree into a newick tree along the path between 2 nodes:
2464    #
2465    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2466    #-------------------------------------------------------------------------------
2467    sub newick_insert_between_nodes
2468    {
2469        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2470        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2471        $fraction >= 0 && $fraction <= 1 or return undef;
2472    
2473        #  Find the paths to the nodes:
2474    
2475        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2476        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2477    
2478        #  Trim the common prefix:
2479    
2480        while ( $path1[1] == $path2[1] )
2481        {
2482            splice( @path1, 0, 2 );
2483            splice( @path2, 0, 2 );
2484        }
2485    
2486        my ( @path, $dist );
2487        if    ( @path1 < 3 )
2488        {
2489            @path2 >= 3 or return undef;              # node1 = node2
2490            $dist = $fraction * newick_path_length( @path2 );
2491            @path = @path2;
2492        }
2493        elsif ( @path2 < 3 )
2494        {
2495            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2496            @path = @path1;
2497        }
2498        else
2499        {
2500            my $dist1 = newick_path_length( @path1 );
2501            my $dist2 = newick_path_length( @path2 );
2502            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2503            @path = ( $dist <= 0 ) ? @path1 : @path2;
2504            $dist = abs( $dist );
2505        }
2506    
2507        #  Descend tree until we reach the insertion branch:
2508    
2509        my $x;
2510        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2511        {
2512            $dist -= $x;
2513            splice( @path, 0, 2 );
2514        }
2515    
2516        #  Insert the new node:
2517    
2518        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2519        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2520    
2521        return $tree;
2522    }
2523    
2524    
2525  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2526  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
# Line 1941  Line 2603 
2603    
2604    
2605  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2606    #  Produce a potentially rooted subtree with the desired tips:
2607    #
2608    #     Except for (some) tip nodes, the tree produced is a copy.
2609    #     There is no check that requested tips exist.
2610    #
2611    #  $newtree = rooted_newick_subtree( $tree,  @tips )
2612    #  $newtree = rooted_newick_subtree( $tree, \@tips )
2613    #-------------------------------------------------------------------------------
2614    sub rooted_newick_subtree {
2615        my ( $tr, @tips ) = @_;
2616        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2617    
2618        if ( @tips < 2 ) { return undef }
2619        my $keephash = { map { ( $_, 1 ) } @tips };
2620        my $tr2 = subtree1( $tr, $keephash );
2621        $tr2->[2] = undef if $tr2;                   # undef root branch length
2622        $tr2;
2623    }
2624    
2625    
2626    #-------------------------------------------------------------------------------
2627  #  Produce a subtree with the desired tips:  #  Produce a subtree with the desired tips:
2628  #  #
2629  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
# Line 2014  Line 2697 
2697  }  }
2698    
2699    
2700    #-------------------------------------------------------------------------------
2701    #  The smallest subtree of rooted tree that includes @tips:
2702    #
2703    #    $node = newick_covering_subtree( $tree,  @tips )
2704    #    $node = newick_covering_subtree( $tree, \@tips )
2705    #-------------------------------------------------------------------------------
2706    
2707    sub newick_covering_subtree {
2708        my $tree = shift;
2709        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2710    
2711        #  Return smallest covering node, if any:
2712    
2713        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2714    }
2715    
2716    
2717    sub newick_covering_subtree_1 {
2718        my ( $node, $tips ) = @_;
2719        my $n_cover = 0;
2720        my @desc = newick_desc_list( $node );
2721        if ( @desc )
2722        {
2723            foreach ( @desc )
2724            {
2725                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2726                return ( $subtree, $n ) if $subtree;
2727                $n_cover += $n;
2728            }
2729        }
2730        elsif ( $tips->{ newick_lbl( $node ) } )
2731        {
2732            $n_cover++;
2733        }
2734    
2735        #  If all tips are covered, return node
2736    
2737        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2738    }
2739    
2740    
2741    #===============================================================================
2742    #
2743    #  Representative subtrees
2744    #
2745    #===============================================================================
2746    #  Find subtree of size n representating vicinity of the root:
2747    #
2748    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2749    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2750    #
2751    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2752    #  consequences on downstream programs.
2753    #-------------------------------------------------------------------------------
2754    sub root_neighborhood_representative_tree
2755    {
2756        my ( $tree, $n, $tip_priority ) = @_;
2757        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2758        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2759    
2760        $tip_priority ||= default_tip_priority( $tree );
2761        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2762                   root_proximal_newick_subtrees( $tree, $n );
2763    
2764        newick_subtree( copy_newick_tree( $tree ), \@tips );
2765    }
2766    
2767    
2768    #-------------------------------------------------------------------------------
2769    #  Find n tips to represent tree lineages in vicinity of another tip.
2770    #  Default tip priority is short total branch length.
2771    #
2772    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2773    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2774    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2775    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2776    #-------------------------------------------------------------------------------
2777    sub root_neighborhood_representative_tips
2778    {
2779        my ( $tree, $n, $tip_priority ) = @_;
2780        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2781    
2782        my @tips;
2783        if ( newick_tip_count( $tree ) <= $n )
2784        {
2785            @tips = newick_tip_list( $tree );
2786        }
2787        else
2788        {
2789            $tip_priority ||= default_tip_priority( $tree );
2790            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2791                    root_proximal_newick_subtrees( $tree, $n );
2792        }
2793    
2794        wantarray ? @tips : \@tips;
2795    }
2796    
2797    
2798    #-------------------------------------------------------------------------------
2799    #  Find subtree of size n representating vicinity of a tip:
2800    #
2801    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2802    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2803    #-------------------------------------------------------------------------------
2804    sub tip_neighborhood_representative_tree
2805    {
2806        my ( $tree, $tip, $n, $tip_priority ) = @_;
2807        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2808        newick_tip_in_tree( $tree, $tip ) or return undef;
2809    
2810        my $tree1 = copy_newick_tree( $tree );
2811        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2812        {
2813            return prune_from_newick( $tree1, $tip )
2814        }
2815    
2816        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2817        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2818        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2819        newick_subtree( copy_newick_tree( $tree ), \@tips );
2820    }
2821    
2822    
2823    #-------------------------------------------------------------------------------
2824    #  Find n tips to represent tree lineages in vicinity of another tip.
2825    #  Default tip priority is short total branch length.
2826    #
2827    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2828    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2829    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2830    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2831    #-------------------------------------------------------------------------------
2832    sub tip_neighborhood_representative_tips
2833    {
2834        my ( $tree, $tip, $n, $tip_priority ) = @_;
2835        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2836        newick_tip_in_tree( $tree, $tip ) or return undef;
2837    
2838        my @tips = newick_tip_list( $tree );
2839        if ( newick_tip_count( $tree ) - 1 <= $n )
2840        {
2841            @tips = grep { $_ ne $tip } @tips;
2842        }
2843        else
2844        {
2845            my $tree1 = copy_newick_tree( $tree );
2846            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2847            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2848            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2849        }
2850    
2851        wantarray ? @tips : \@tips;
2852    }
2853    
2854    
2855    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2856    #  Anonymous hash of the negative distance from root to each tip:
2857    #
2858    #   \%tip_priority = default_tip_priority( $tree )
2859    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2860    sub default_tip_priority
2861    {
2862        my ( $tree ) = @_;
2863        my $tip_distances = newick_tip_distances( $tree ) || {};
2864        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2865    }
2866    
2867    
2868    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2869    #  Select a tip from a subtree base on a priority value:
2870    #
2871    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2872    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2873    sub representative_tip_of_newick_node
2874    {
2875        my ( $node, $tip_priority ) = @_;
2876        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2877                      map  { [ $_, $tip_priority->{ $_ } ] }
2878                      newick_tip_list( $node );
2879        $tip->[0];                                   # Label from label-priority pair
2880    }
2881    
2882    
2883    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2884    #  Find n subtrees focused around the root of a tree.  Typically each will
2885    #  then be reduced to a single tip to make a representative tree:
2886    #
2887    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2888    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2889    sub root_proximal_newick_subtrees
2890    {
2891        my ( $tree, $n ) = @_;
2892        my $node_start_end = newick_branch_intervals( $tree );
2893        n_representative_branches( $n, $node_start_end );
2894    }
2895    
2896    
2897    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2898    #   @node_start_end = newick_branch_intervals( $tree )
2899    #  \@node_start_end = newick_branch_intervals( $tree )
2900    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2901    sub newick_branch_intervals
2902    {
2903        my ( $node, $parent_x ) = @_;
2904        $parent_x ||= 0;
2905        my ( $desc, undef, $dx ) = @$node;
2906        my $x = $parent_x + $dx;
2907        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2908        my @intervals = ( $interval,
2909                          map { &newick_branch_intervals( $_, $x ) } @$desc
2910                        );
2911        return wantarray ? @intervals : \@intervals;
2912    }
2913    
2914    
2915    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2916    #   @ids = n_representative_branches( $n,  @id_start_end )
2917    #   @ids = n_representative_branches( $n, \@id_start_end )
2918    #  \@ids = n_representative_branches( $n,  @id_start_end )
2919    #  \@ids = n_representative_branches( $n, \@id_start_end )
2920    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2921    sub n_representative_branches
2922    {
2923        my $n = shift;
2924        #  Sort intervals by start point:
2925        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2926                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2927        my @active = ();
2928        my ( $interval, $current_point );
2929        foreach $interval ( @unprocessed )
2930        {
2931            $current_point = $interval->[1];
2932            #  Filter out intervals that have ended.  This is N**2 in the number
2933            #  of representatives.  Fixing this would require maintaining a sorted
2934            #  active list.
2935            @active = grep { $_->[2] > $current_point } @active;
2936            push @active, $interval;
2937            last if ( @active >= $n );
2938        }
2939    
2940        my @ids = map { $_->[0] } @active;
2941        return wantarray() ? @ids : \@ids;
2942    }
2943    
2944    
2945  #===============================================================================  #===============================================================================
2946  #  #
2947  #  Tree writing and reading  #  Tree writing and reading
2948  #  #
2949  #===============================================================================  #===============================================================================
2950  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
2951    #  writeNewickTree( $tree, $file )
2952    #  writeNewickTree( $tree, \*FH )
2953  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2954  sub writeNewickTree {  sub writeNewickTree {
2955      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
2956      $file || ( $file = \*STDOUT );      my ( $fh, $close ) = open_output( $file );
2957      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
2958        print  $fh  ( strNewickTree( $tree ), "\n" );
2959        close $fh if $close;
2960  }  }
2961    
2962    
# Line 2167  Line 3099 
3099    
3100    
3101  #===============================================================================  #===============================================================================
3102    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
3103    #  @trees = read_newick_trees( $file ) # reads to end of file
3104    #===============================================================================
3105    
3106    sub read_newick_tree
3107    {
3108        my $file = shift;
3109        my ( $fh, $close ) = open_input( $file );
3110        my $tree;
3111        my @lines = ();
3112        foreach ( <$fh> )
3113        {
3114            chomp;
3115            push @lines, $_;
3116            if ( /;/ )
3117            {
3118                $tree = parse_newick_tree_str( join( ' ', @lines ) );
3119                last;
3120            }
3121        }
3122        close $fh if $close;
3123    
3124        $tree;
3125    }
3126    
3127    
3128    sub read_newick_trees
3129    {
3130        my $file = shift;
3131        my ( $fh, $close ) = open_input( $file );
3132        my @trees = ();
3133        my @lines = ();
3134        foreach ( <$fh> )
3135        {
3136            chomp;
3137            push @lines, $_;
3138            if ( /;/ )
3139            {
3140                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
3141                @lines = ()
3142            }
3143        }
3144        close $fh if $close;
3145    
3146        @trees;
3147    }
3148    
3149    
3150    #===============================================================================
3151  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
3152  #  #
3153  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2337  Line 3318 
3318      #  Loop while it is a comment:      #  Loop while it is a comment:
3319      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3320          $ind++;          $ind++;
3321            my $depth = 1;
3322            my $ind2  = $ind;
3323    
3324          #  Find end          #  Find end
3325          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3326            {
3327                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3328                {
3329                    $ind2 += length( $1 );  #  Points at char just past [
3330                    $depth++;               #  If nested comments are allowed
3331                }
3332                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3333                {
3334                    $ind2 += length( $1 );  #  Points at char just past ]
3335                    $depth--;
3336                }
3337                else
3338                {
3339              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3340                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3341          }          }
3342          my $comment = $1;          }
3343    
3344          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3345          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3346    
3347          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3348    
3349          #  Skip white space          #  Skip white space
3350          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2367  Line 3363 
3363  #===============================================================================  #===============================================================================
3364  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3365  #  #
3366  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3367  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3368  #     $width  the approximate characters for the tree without labels  #
3369  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3370  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3371  #  #     $width  # the approximate characters for the tree without labels (D = 68)
3372  #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  #     $min_dx # the minimum horizontal branch length (D = 2)
3373  #===============================================================================  #     $dy     # the vertical space per taxon (D = 1, most compressed)
3374  sub printer_plot_newick {  #
3375      my ( $node, $file, $width, $min_dx, $dy ) = @_;  #  Options:
3376    #
3377      my ( $fh, $close );  #    dy     => nat_number    # the vertical space per taxon
3378      if ( ! defined( $file ) ) {  #    chars  => key           # line drawing character set:
3379          $fh = \*STDOUT;  #                            #     html_unicode
3380      }  #                            #     text (default)
3381      elsif ( $file =~ /^\*/ ) {  #    min_dx => whole_number  # the minimum horizontal branch length
3382          $fh = $file;  #    width  => whole_number  # approximate tree width without labels
3383      }  #
3384      else {  #===============================================================================
3385          open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  sub printer_plot_newick
3386          $close = 1;  {
3387      }      my ( $node, $file, @opts ) = @_;
3388    
3389        my ( $fh, $close ) = open_output( $file );
3390        $fh or return;
3391    
3392        my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3393                            && $opts[0]->{ chars }
3394                            && $opts[0]->{ chars } =~ /html/;
3395        print $fh '<PRE>' if $html;
3396        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3397        print $fh "</PRE>\n" if $html;
3398    
     print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";  
3399      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3400  }  }
3401    
3402    
3403  #===============================================================================  #===============================================================================
3404    #  Character sets for printer plot trees:
3405    #-------------------------------------------------------------------------------
3406    
3407    my %char_set =
3408      ( text1     => { space  => ' ',
3409                       horiz  => '-',
3410                       vert   => '|',
3411                       el_d_r => '/',
3412                       el_u_r => '\\',
3413                       el_d_l => '\\',
3414                       el_u_l => '/',
3415                       tee_l  => '+',
3416                       tee_r  => '+',
3417                       tee_u  => '+',
3418                       tee_d  => '+',
3419                       half_l => '-',
3420                       half_r => '-',
3421                       half_u => '|',
3422                       half_d => '|',
3423                       cross  => '+',
3424                     },
3425        text2     => { space  => ' ',
3426                       horiz  => '-',
3427                       vert   => '|',
3428                       el_d_r => '+',
3429                       el_u_r => '+',
3430                       el_d_l => '+',
3431                       el_u_l => '+',
3432                       tee_l  => '+',
3433                       tee_r  => '+',
3434                       tee_u  => '+',
3435                       tee_d  => '+',
3436                       half_l => '-',
3437                       half_r => '-',
3438                       half_u => '|',
3439                       half_d => '|',
3440                       cross  => '+',
3441                     },
3442        html_box  => { space  => '&nbsp;',
3443                       horiz  => '&#9472;',
3444                       vert   => '&#9474;',
3445                       el_d_r => '&#9484;',
3446                       el_u_r => '&#9492;',
3447                       el_d_l => '&#9488;',
3448                       el_u_l => '&#9496;',
3449                       tee_l  => '&#9508;',
3450                       tee_r  => '&#9500;',
3451                       tee_u  => '&#9524;',
3452                       tee_d  => '&#9516;',
3453                       half_l => '&#9588;',
3454                       half_r => '&#9590;',
3455                       half_u => '&#9589;',
3456                       half_d => '&#9591;',
3457                       cross  => '&#9532;',
3458                     },
3459        utf8_box  => { space  => ' ',
3460                       horiz  => chr(226) . chr(148) . chr(128),
3461                       vert   => chr(226) . chr(148) . chr(130),
3462                       el_d_r => chr(226) . chr(148) . chr(140),
3463                       el_u_r => chr(226) . chr(148) . chr(148),
3464                       el_d_l => chr(226) . chr(148) . chr(144),
3465                       el_u_l => chr(226) . chr(148) . chr(152),
3466                       tee_l  => chr(226) . chr(148) . chr(164),
3467                       tee_r  => chr(226) . chr(148) . chr(156),
3468                       tee_u  => chr(226) . chr(148) . chr(180),
3469                       tee_d  => chr(226) . chr(148) . chr(172),
3470                       half_l => chr(226) . chr(149) . chr(180),
3471                       half_r => chr(226) . chr(149) . chr(182),
3472                       half_u => chr(226) . chr(149) . chr(181),
3473                       half_d => chr(226) . chr(149) . chr(183),
3474                       cross  => chr(226) . chr(148) . chr(188),
3475                     },
3476      );
3477    
3478    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3479    $char_set{ html1 }->{ space } = '&nbsp;';
3480    
3481    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3482    $char_set{ html2 }->{ space } = '&nbsp;';
3483    
3484    #  Define some synonyms
3485    
3486    $char_set{ html } = $char_set{ html_box };
3487    $char_set{ line } = $char_set{ utf8_box };
3488    $char_set{ symb } = $char_set{ utf8_box };
3489    $char_set{ text } = $char_set{ text1 };
3490    $char_set{ utf8 } = $char_set{ utf8_box };
3491    
3492    #  Define tree formats and synonyms
3493    
3494    my %tree_format =
3495        ( text         => 'text',
3496          tree_tab_lbl => 'tree_tab_lbl',
3497          tree_lbl     => 'tree_lbl',
3498          chrlist_lbl  => 'chrlist_lbl',
3499          raw          => 'chrlist_lbl',
3500        );
3501    
3502    #===============================================================================
3503  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3504  #  #
3505  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3506  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3507  #     $min_dx the minimum horizontal branch length  #
3508  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3509    #     $width  # the approximate characters for the tree without labels (D = 68)
3510    #     $min_dx # the minimum horizontal branch length (D = 2)
3511    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3512    #
3513    #  Options:
3514    #
3515    #    chars  => keyword       # the output character set for the tree
3516    #    dy     => nat_number    # the vertical space per taxon
3517    #    format => keyword       # output format of each line
3518    #    min_dx => whole_number  # the minimum horizontal branch length
3519    #    width  => whole_number  # approximate tree width without labels
3520    #
3521    #  Character sets:
3522    #
3523    #    html       #  synonym of html1
3524    #    html_box   #  html encoding of unicode box drawing characters
3525    #    html1      #  text1 with nonbreaking spaces
3526    #    html2      #  text2 with nonbreaking spaces
3527    #    line       #  synonym of utf8_box
3528    #    raw        #  pass out the internal representation
3529    #    symb       #  synonym of utf8_box
3530    #    text       #  synonym of text1 (Default)
3531    #    text1      #  ascii characters: - + | / \ and space
3532    #    text2      #  ascii characters: - + | + + and space
3533    #    utf8       #  synonym of utf8_box
3534    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3535    #
3536    #  Formats for row lines:
3537    #
3538    #    text           #    $textstring              # Default
3539    #    tree_tab_lbl   #    $treestr \t $labelstr
3540    #    tree_lbl       # [  $treestr,  $labelstr ]
3541    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3542    #    raw            #  synonym of chrlist_lbl
3543  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3544  #===============================================================================  #===============================================================================
3545  sub text_plot_newick {  sub text_plot_newick
3546      my ( $node, $width, $min_dx, $dy ) = @_;  {
3547        my $node = shift @_;
3548      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3549      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3550      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3551      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3552        {
3553            $opts = shift;
3554        }
3555        else
3556        {
3557            ( $width, $min_dx, $dy ) = @_;
3558            $opts = {};
3559        }
3560    
3561        $chars = $opts->{ chars } || '';
3562        my $charH;
3563        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3564        my $is_box = $charH eq $char_set{ html_box }
3565                  || $charH eq $char_set{ utf8_box }
3566                  || $chars eq 'raw';
3567    
3568        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3569        $fmt = $tree_format{ $fmt || '' } || 'text';
3570    
3571        $dy    ||= $opts->{ dy     } ||  1;
3572        $width ||= $opts->{ width  } || 68;
3573        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3574        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3575    
3576        #  Layout the tree:
3577    
3578      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3579      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2419  Line 3582 
3582      my $hash = {};      my $hash = {};
3583      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3584    
3585      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3586    
3587      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3588      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3589        foreach ( ( $y1 .. $y2 ) )
3590        {
3591            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3592            my $lbl  = '';
3593            if ( @$line )
3594            {
3595                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3596                #  Translate tree characters
3597                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3598            }
3599    
3600            # Convert to requested output format:
3601    
3602            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3603                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3604                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3605                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3606                       :                          ();
3607        }
3608    
3609        # if ( $cells )
3610        # {
3611        #     my $nmax = 0;
3612        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3613        #     foreach ( @lines )
3614        #     {
3615        #         @$_ = map { "<TD>$_</TD>" } @$_;
3616        #         my $span = $nmax - @$_ + 1;
3617        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3618        #     }
3619        # }
3620        # elsif ( $tables )
3621        # {
3622        #     my $nmax = 0;
3623        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3624        #     foreach ( @lines )
3625        #     {
3626        #         @$_ = map { "<TD>$_</TD>" } @$_;
3627        #         my $span = $nmax - @$_ + 1;
3628        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3629        #     }
3630        # }
3631    
3632        wantarray ? @lines : \@lines;
3633  }  }
3634    
3635    
3636  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3637  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3638  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3639  sub layout_printer_plot {  sub layout_printer_plot
3640      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3641        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3642      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3643      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3644    
3645      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3646      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3647          $dx *= $x_scale;          $dx *= $x_scale;
3648          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3649      }      }
3650      else {      else {
3651          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2465  Line 3672 
3672          $ymax = $y0;          $ymax = $y0;
3673    
3674          foreach ( @dl ) {          foreach ( @dl ) {
3675              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3676                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3677                                                            );
3678              push @ylist, $yi;              push @ylist, $yi;
3679              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3680          }          }
# Line 2475  Line 3684 
3684    
3685          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3686          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3687          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3688    
3689            #  Handle special case of internal node label. Put it between subtrees.
3690    
3691            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3692                #  Find the descendents $i1 and $i2 to put the branch between
3693                my $i2 = 1;
3694                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3695                my $i1 = $i2 - 1;
3696                #  Get bottom of subtree1 and top of subtree2:
3697                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3698                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3699                #  Midway between bottom of subtree1 and top of subtree2, with
3700                #  preferred rounding direction
3701                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3702            }
3703      }      }
3704    
3705      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2485  Line 3709 
3709  }  }
3710    
3711    
3712    #  What symbol do we get if we add a leftward line to some other symbol?
3713    
3714    my %with_left_line = ( space  => 'half_l',
3715                           horiz  => 'horiz',
3716                           vert   => 'tee_l',
3717                           el_d_r => 'tee_d',
3718                           el_u_r => 'tee_u',
3719                           el_d_l => 'el_d_l',
3720                           el_u_l => 'el_u_l',
3721                           tee_l  => 'tee_l',
3722                           tee_r  => 'cross',
3723                           tee_u  => 'tee_u',
3724                           tee_d  => 'tee_d',
3725                           half_l => 'half_l',
3726                           half_r => 'horiz',
3727                           half_u => 'el_u_l',
3728                           half_d => 'el_d_l',
3729                           cross  => 'cross',
3730                         );
3731    
3732    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3733    #  Produce a description of one line of a printer plot tree.
3734    #
3735    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3736    #
3737    #     \@line is the character descriptions accumulated so far, one per array
3738    #          element, except for a label, which can be any number of characters.
3739    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3740    #          then $line->[-2] is a label. The calling program translates the
3741    #          symbol names to output characters.
3742    #
3743    #     \@node is a newick tree node
3744    #     \%hash contains tree layout information
3745    #      $row  is the row number (y value) that we are building
3746    #      $symb is the plot symbol proposed for the current x and y position
3747    #      $ilbl is true if internal node labels are allowed
3748    #
3749    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3750    sub text_tree_row
3751    {
3752        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3753    
3754        my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3755        if ( $row < $y1 || $row > $y2 ) { return $line }
3756    
3757        if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3758    
3759        if ( $row == $y ) {
3760            while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
3761            push @$line, $symb,
3762                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
3763        }
3764    
3765        elsif ( $row > $yn1 && $row < $yn2 ) {
3766            if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
3767            else               { $line->[$x] = 'vert' }
3768        }
3769    
3770        my @dl = newick_desc_list( $node );
3771    
3772        if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
3773    
3774        else {
3775            my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
3776            if ( @list > 1 ) { #  Fix top and bottom sympbols
3777                $list[ 0]->[1] = 'el_d_r';
3778                $list[-1]->[1] = 'el_u_r';
3779            }
3780            elsif ( @list ) {  # Only one descendent
3781                $list[ 0]->[1] = 'half_r';
3782            }
3783            foreach ( @list ) {
3784                my ( $n, $s ) = @$_;
3785                if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
3786                    $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
3787                }
3788            }
3789    
3790            if ( $row == $y ) {
3791                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
3792                                                         : $with_left_line{ $line->[$x] };
3793                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
3794            }
3795        }
3796    
3797        return $line;
3798    }
3799    
3800    
3801  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3802  #  Debug routine  #  Debug routine
3803  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Line 2510  Line 3823 
3823  }  }
3824    
3825    
3826  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #===============================================================================
3827  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Open an input file stream:
3828  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #
3829  sub text_tree_row {  #     ( $handle, undef ) = open_input(       );  # \*STDIN
3830      my ( $node, $hash, $row, $line, $symb ) = @_;  #     ( $handle, undef ) = open_input( \*FH  );
3831    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
3832    #
3833    #===============================================================================
3834    sub open_input
3835    {
3836        my $file = shift;
3837        my $fh;
3838        if    ( ! defined( $file ) )     { return ( \*STDIN ) }
3839        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
3840        elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
3841    
3842      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      print STDERR "gjonewick::open_input could not open '$file' for reading\n";
3843      if ( $row < $y1 || $row > $y2 ) { return $line }      return undef;
3844    }
3845    
     if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }  
3846    
3847      if ( $row == $y ) {  #===============================================================================
3848          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");  #  Open an output file stream:
3849    #
3850    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
3851    #     ( $handle, undef ) = open_output( \*FH );
3852    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
3853    #
3854    #===============================================================================
3855    sub open_output
3856    {
3857        my $file = shift;
3858        my $fh;
3859        if    ( ! defined( $file ) )     { return ( \*STDOUT ) }
3860        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
3861        elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
3862    
3863        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
3864        return undef;
3865      }      }
3866    
3867      elsif ( $row > $yn1 && $row < $yn2 ) {  
3868          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }  #===============================================================================
3869          else { substr( $line, $x ) = "|" }  #  Some subroutines copied from gjolists
3870    #===============================================================================
3871    #  Return the common prefix of two lists:
3872    #
3873    #  @common = common_prefix( \@list1, \@list2 )
3874    #-----------------------------------------------------------------------------
3875    sub common_prefix
3876    {
3877        my ($l1, $l2) = @_;
3878        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3879        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3880    
3881        my $i = 0;
3882        my $l1_i;
3883        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
3884    
3885        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
3886      }      }
3887    
     my @dl = newick_desc_list( $node );  
3888    
3889      if ( @dl < 1 ) {  #-----------------------------------------------------------------------------
3890          $line .= " " . $node->[1];  #  Return the unique suffixes of each of two lists:
3891    #
3892    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
3893    #-----------------------------------------------------------------------------
3894    sub unique_suffixes
3895    {
3896        my ($l1, $l2) = @_;
3897        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3898        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3899    
3900        my $i = 0;
3901        my @l1 = @$l1;
3902        my @l2 = @$l2;
3903        my $l1_i;
3904        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
3905    
3906        splice @l1, 0, $i;
3907        splice @l2, 0, $i;
3908        return ( \@l1, \@l2 );
3909      }      }
3910    
     else {  
         my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line  
         $list[ 0]->[1] = "/";  
         $list[-1]->[1] = "\\";  
3911    
3912          foreach ( @list ) {  #-------------------------------------------------------------------------------
3913              my ( $n, $s ) = @$_;  #  List of values duplicated in a list (stable in order by second occurance):
3914              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {  #
3915                  $line = text_tree_row( $n, $hash, $row, $line, $s );  #  @dups = duplicates( @list )
3916    #-------------------------------------------------------------------------------
3917    sub duplicates
3918    {
3919        my %cnt = ();
3920        grep { ++$cnt{$_} == 2 } @_;
3921    }
3922    
3923    
3924    #-------------------------------------------------------------------------------
3925    #  Randomize the order of a list:
3926    #
3927    #  @random = random_order( @list )
3928    #-------------------------------------------------------------------------------
3929    sub random_order
3930    {
3931        my ( $i, $j );
3932        for ( $i = @_ - 1; $i > 0; $i-- )
3933        {
3934            $j = int( ($i+1) * rand() );
3935            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
3936        }
3937    
3938       @_;
3939              }              }
3940    
3941    
3942    #-----------------------------------------------------------------------------
3943    #  Intersection of two or more sets:
3944    #
3945    #  @intersection = intersection( \@set1, \@set2, ... )
3946    #-----------------------------------------------------------------------------
3947    sub intersection
3948    {
3949        my $set = shift;
3950        my @intersection = @$set;
3951    
3952        foreach $set ( @_ )
3953        {
3954            my %set = map { $_ => 1 } @$set;
3955            @intersection = grep { exists $set{ $_ } } @intersection;
3956           }           }
3957    
3958          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }      @intersection;
3959      }      }
3960    
3961      return $line;  
3962    #-----------------------------------------------------------------------------
3963    #  Elements in set 1, but not set 2:
3964    #
3965    #  @difference = set_difference( \@set1, \@set2 )
3966    #-----------------------------------------------------------------------------
3967    sub set_difference
3968    {
3969        my ($set1, $set2) = @_;
3970        my %set2 = map { $_ => 1 } @$set2;
3971        grep { ! ( exists $set2{$_} ) } @$set1;
3972  }  }
3973    
3974    

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3