[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Tue Dec 19 18:01:14 2006 UTC revision 1.15, Sun Sep 6 22:38:32 2009 UTC
# Line 1  Line 1 
1  #  #
2  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
3  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
4  #  #
5  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 44 
44  #  #
45  #     $tree = \@rootnode;  #     $tree = \@rootnode;
46  #  #
47  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
48  #                $label, #  node label  #                $label, #  node label
49  #                $x,     #  branch length  #                $x,     #  branch length
50  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 52 
52  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
53  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
54  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
55  #             )  #             ]
56  #  #
57  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
58  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 63 
63  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
64  #  #
65  #  #
66    #  Ross Overbeek has a different tree node structure:
67    #
68    #     $node = [ Label,
69    #               DistanceToParent,
70    #               [ ParentPointer, ChildPointer1, ... ],
71    #               [ Name1\tVal1, Name2\tVal2, ... ]
72    #             ]
73    #
74    #  So:
75    #
76    #===============================================================================
77    #  Tree format interconversion:
78    #===============================================================================
79    #
80    #  $bool      = is_overbeek_tree( $tree )
81    #  $bool      = is_gjonewick_tree( $tree )
82    #
83    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
84    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
85    #
86  #===============================================================================  #===============================================================================
87  #  Tree data extraction:  #  Tree data extraction:
88  #===============================================================================  #===============================================================================
# Line 90  Line 110 
110  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
111  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
112  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
113  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
114    #
115    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
116  #  #
117  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
118  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
# Line 99  Line 121 
121  #  #
122  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
123  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
124    # \@tiprefs = newick_tip_ref_list( $noderef )
125  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
126    # \@tips    = newick_tip_list( $noderef )
127    #
128  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
129  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
130    #
131  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
132    # \@tips    = newick_duplicated_tips( $noderef )
133    #
134  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
135    #
136  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
137    # \@tips    = newick_shared_tips( $tree1, $tree2 )
138  #  #
139  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
140    #
141    #  %tip_distances = newick_tip_distances( $noderef )
142    # \%tip_distances = newick_tip_distances( $noderef )
143    #
144  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
145  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
146  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
147    #
148    #  Tree tip insertion point (tip is on branch of length x that
149    #  is inserted into branch connecting node1 and node2, a distance
150    #  x1 from node1 and x2 from node2):
151    #
152    #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
153    #
154    #  Standardized label for a node in terms of intersection of 3 lowest sorting
155    #  tips (sort is lower case):
156    #
157    #  @TipOrTips = std_node_name( $tree, $node )
158  #  #
159  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
160  #  Paths from root of tree:  #  Paths from root of tree:
# Line 140  Line 185 
185  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
186  #  #
187  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
188  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
189  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
190  #  before modifying.  #  before modifying.
191  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 202 
202  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
203  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
204  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
205    #  $node      = newick_rescale_branches( $node, $factor )
206    #  $node      = newick_modify_branches( $node, \&function )
207    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
208    #
209    #  Modify comments:
210    #
211    #  $node = newick_strip_comments( $node )
212  #  #
213  #  Modify rooting and/or order:  #  Modify rooting and/or order:
214  #  #
# Line 170  Line 222 
222  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
223  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
224  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
225  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
226    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
227    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
228  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
229  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
230  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
231  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
232  #  #
233  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
234    #  $newtree = rooted_newick_subtree( $tree,  @tips )
235    #  $newtree = rooted_newick_subtree( $tree, \@tips )
236  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
237  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
238    #  $newtree = newick_covering_subtree( $tree,  @tips )
239    #  $newtree = newick_covering_subtree( $tree, \@tips )
240  #  #
241  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
242  #  #
243    #  $node = newick_insert_at_node( $node, $subtree )
244    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
245    #
246    #===============================================================================
247    #  Tree neighborhood: subtree of n tips to represent a larger tree.
248    #===============================================================================
249    #
250    #  Focus around root:
251    #
252    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
253    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
254    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
255    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
256    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
257    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
258    #
259    #  Focus around a tip insertion point (the tip is not in the subtree):
260    #
261    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
262    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
263    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
264    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
265    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
266    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
267    #
268  #===============================================================================  #===============================================================================
269  #  Tree reading and writing:  #  Tree reading and writing:
270  #===============================================================================  #===============================================================================
271    #  Write machine-readable trees:
272  #  #
273  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
274  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
275  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
276  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
277  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
278  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
279    #
280    #  Write human-readable trees:
281    #
282  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
283  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
284  #  #
285    #  Read trees:
286    #
287    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
288    #  @trees = read_newick_trees( $file ) # reads to end of file
289  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
290  #  #
291  #===============================================================================  #===============================================================================
292    
293    
294    use Carp;
295    use Data::Dumper;
296    use strict;
297    
298  require Exporter;  require Exporter;
299    
300  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
301  our @EXPORT = qw(  our @EXPORT = qw(
302            is_overbeek_tree
303            is_gjonewick_tree
304            overbeek_to_gjonewick
305            gjonewick_to_overbeek
306            newick_is_valid
307          newick_is_rooted          newick_is_rooted
308          newick_is_unrooted          newick_is_unrooted
309          tree_rooted_on_tip          tree_rooted_on_tip
310          newick_is_bifurcating          newick_is_bifurcating
311          newick_tip_count          newick_tip_count
312            newick_tip_ref_list
313          newick_tip_list          newick_tip_list
314    
315          newick_first_tip          newick_first_tip
316          newick_duplicated_tips          newick_duplicated_tips
317          newick_tip_in_tree          newick_tip_in_tree
318          newick_shared_tips          newick_shared_tips
319    
320          newick_tree_length          newick_tree_length
321            newick_tip_distances
322          newick_max_X          newick_max_X
323          newick_most_distant_tip_ref          newick_most_distant_tip_ref
324          newick_most_distant_tip_name          newick_most_distant_tip_name
325    
326            newick_tip_insertion_point
327    
328          std_newick_name          std_newick_name
329    
330          path_to_tip          path_to_tip
# Line 241  Line 346 
346          newick_set_undefined_branches          newick_set_undefined_branches
347          newick_set_all_branches          newick_set_all_branches
348          newick_fix_negative_branches          newick_fix_negative_branches
349            newick_rescale_branches
350            newick_modify_branches
351    
352            newick_strip_comments
353    
354          normalize_newick_tree          normalize_newick_tree
355          reverse_newick_tree          reverse_newick_tree
# Line 254  Line 363 
363          reroot_newick_next_to_tip          reroot_newick_next_to_tip
364          reroot_newick_to_node          reroot_newick_to_node
365          reroot_newick_to_node_ref          reroot_newick_to_node_ref
366            reroot_newick_between_nodes
367            reroot_newick_to_midpoint
368            reroot_newick_to_midpoint_w
369          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
370          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
371          uproot_tip_rooted_newick          uproot_tip_rooted_newick
372          uproot_newick          uproot_newick
373    
374          prune_from_newick          prune_from_newick
375            rooted_newick_subtree
376          newick_subtree          newick_subtree
377            newick_covering_subtree
378          collapse_zero_length_branches          collapse_zero_length_branches
379    
380            newick_insert_at_node
381            newick_insert_between_nodes
382    
383            root_neighborhood_representative_tree
384            root_neighborhood_representative_tips
385            tip_neighborhood_representative_tree
386            tip_neighborhood_representative_tips
387    
388          writeNewickTree          writeNewickTree
389          fwriteNewickTree          fwriteNewickTree
390          strNewickTree          strNewickTree
391          formatNewickTree          formatNewickTree
392    
393            read_newick_tree
394            read_newick_trees
395          parse_newick_tree_str          parse_newick_tree_str
396    
397          printer_plot_newick          printer_plot_newick
# Line 305  Line 430 
430          );          );
431    
432    
 use gjolists qw(  
         common_prefix  
         common_and_unique  
         unique_suffixes  
   
         unique_set  
         duplicates  
         random_order  
   
         union  
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
433  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
434  #  Internally used definitions  #  Internally used definitions
435  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 332  Line 439 
439    
440    
441  #===============================================================================  #===============================================================================
442    #  Interconvert overbeek and gjonewick trees:
443    #===============================================================================
444    
445    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
446    
447    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
448    
449    sub overbeek_to_gjonewick
450    {
451        return () unless ref( $_[0] ) eq 'ARRAY';
452        my ( $lbl, $x, $desc ) = @{ $_[0] };
453        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
454        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
455    }
456    
457    sub gjonewick_to_overbeek
458    {
459        return () unless ref( $_[0] ) eq 'ARRAY';
460        my ( $desc, $lbl, $x ) = @{ $_[0] };
461        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
462        my $parent = $_[1];
463        my $node = [ $lbl, $x, undef, [] ];
464        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
465        return $node;
466    }
467    
468    
469    #===============================================================================
470  #  Extract tree structure values:  #  Extract tree structure values:
471  #===============================================================================  #===============================================================================
472  #  #
# Line 351  Line 486 
486  #  #
487  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
488    
489  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
490  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
491  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
492  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
493  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
494  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
495  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
496  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
497    
498  sub newick_desc_list {  sub newick_desc_list {
499      my $node = $_[0];      my $node = $_[0];
# Line 427  Line 562 
562  #===============================================================================  #===============================================================================
563  #  Some tree property tests:  #  Some tree property tests:
564  #===============================================================================  #===============================================================================
565    #  Tree is valid?
566    #
567    #  $bool = newick_is_valid( $node, $verbose )
568    #-------------------------------------------------------------------------------
569    sub newick_is_valid
570    {
571        my $node = shift;
572    
573        if ( ! array_ref( $node ) )
574        {
575            print STDERR "Node is not array reference\n" if $_[0];
576            return 0;
577        }
578    
579        my @node = @$node;
580        if ( ! @node )
581        {
582            print STDERR "Node is empty array reference\n" if $_[0];
583            return 0;
584        }
585    
586        # Must have descendant or label:
587    
588        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
589        {
590            print STDERR "Node has neither descendant nor label\n" if $_[0];
591            return 0;
592        }
593    
594        #  If comments are present, they must be array references
595    
596        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
597        {
598            if ( defined( $_ ) && ! array_ref( $_ ) )
599            {
600                print STDERR "Node has neither descendant or label\n" if $_[0];
601                return 0;
602            }
603        }
604    
605        #  Inspect the descendants:
606    
607        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
608        {
609            newick_is_valid( $_, @_ ) || return 0
610        }
611    
612        return 1;
613    }
614    
615    
616    #-------------------------------------------------------------------------------
617  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
618  #  #
619  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 512  Line 699 
699  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
700  #  List of tip nodes:  #  List of tip nodes:
701  #  #
702  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
703    # \@tips = newick_tip_ref_list( $noderef )
704  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
705  sub newick_tip_ref_list {  sub newick_tip_ref_list {
706      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 529  Line 717 
717          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
718      }      }
719    
720      @list;      wantarray ? @list : \@list;
721  }  }
722    
723    
# Line 537  Line 725 
725  #  List of tips:  #  List of tips:
726  #  #
727  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
728    # \@tips = newick_tip_list( $node )
729  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
730  sub newick_tip_list {  sub newick_tip_list {
731      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
732        wantarray ? @tips : \@tips;
733  }  }
734    
735    
# Line 578  Line 768 
768  #  List of duplicated tip labels.  #  List of duplicated tip labels.
769  #  #
770  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
771    # \@tips = newick_duplicated_tips( $node )
772  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
773  sub newick_duplicated_tips {  sub newick_duplicated_tips {
774      duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
775        wantarray ? @tips : \@tips;
776  }  }
777    
778    
# Line 611  Line 803 
803  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
804  #  #
805  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
806    # \@tips = newick_shared_tips( $tree1, $tree2 )
807  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
808  sub newick_shared_tips {  sub newick_shared_tips {
809      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
810      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
811      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
812      intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
813        wantarray ? @tips : \@tips;
814  }  }
815    
816    
# Line 638  Line 832 
832    
833    
834  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
835    #  Hash of tip nodes and corresponding distances from root:
836    #
837    #   %tip_distances = newick_tip_distances( $node )
838    #  \%tip_distances = newick_tip_distances( $node )
839    #-------------------------------------------------------------------------------
840    sub newick_tip_distances
841    {
842        my ( $node, $x, $hash ) = @_;
843        my $root = ! $hash;
844        ref( $hash ) eq 'HASH' or $hash = {};
845    
846        $x ||= 0;
847        $x  += newick_x( $node ) || 0;
848    
849        #  Is it a tip?
850    
851        my $n_desc = newick_n_desc( $node );
852        if ( ! $n_desc )
853        {
854            $hash->{ newick_lbl( $node ) } = $x;
855            return $hash;
856        }
857    
858        #  Tree rooted on tip?
859    
860        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
861        {
862            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
863        }
864    
865        foreach ( newick_desc_list( $node ) )
866        {
867            newick_tip_distances( $_, $x, $hash );
868        }
869    
870        wantarray ? %$hash : $hash;
871    }
872    
873    
874    #-------------------------------------------------------------------------------
875  #  Tree max X.  #  Tree max X.
876  #  #
877  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 712  Line 946 
946    
947    
948  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
949    #  Tree tip insertion point (with standard node labels):
950    #
951    #  [ $node1, $x1, $node2, $x2, $x ]
952    #           = newick_tip_insertion_point( $tree, $tip )
953    #
954    #  Which means: tip is on a branch of length x that is inserted into the branch
955    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
956    #
957    #                x1    +------ n1a (lowest sorting tip of this subtree)
958    #            +--------n1
959    #            |         +------n1b (lowest sorting tip of this subtree)
960    #  tip-------n
961    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
962    #            +------n2
963    #               x2   +-------- n2b (lowest sorting tip of this subtree)
964    #
965    #  The designations of 1 vs 2, and a vs b are chosen such that:
966    #     n1a < n1b, and n2a < n2b, and n1a < n2a
967    #
968    #  Then the statandard description becomes:
969    #
970    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
971    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
972    #    x
973    #  ]
974    #
975    #-------------------------------------------------------------------------------
976    sub newick_tip_insertion_point
977    {
978        my ( $tree, $tip ) = @_;
979        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
980        $tree = copy_newick_tree( $tree )           or return undef;
981        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
982        my $node = $tree;
983    
984        my $x  = 0;                        # Distance to node
985        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
986        $node  = $dl->[0];                 # Node adjacent to tip
987        $dl    = newick_desc_ref( $node );
988        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
989        {
990            $node = $dl->[0];
991            $x   += newick_x( $node );
992            $dl   = newick_desc_ref( $node );
993        }
994        $x += newick_x( $node );
995    
996        #  We are now at the node that is the insertion point.
997        #  Is it a tip?
998    
999        my @description;
1000    
1001        if ( ( ! $dl ) || @$dl == 0 )
1002        {
1003            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
1004        }
1005    
1006        #  Is it a trifurcation or greater, in which case it does not go
1007        #  away with tip deletion?
1008    
1009        elsif ( @$dl > 2 )
1010        {
1011            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
1012        }
1013    
1014        #  The node is bifurcating.  We need to describe it.
1015    
1016        else
1017        {
1018            my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1019            my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1020    
1021            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1022            if ( @$n2 == 2 )
1023            {
1024                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
1025            }
1026            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
1027            @description = ( $n1, $x1, $n2, $x2, $x );
1028        }
1029    
1030        return wantarray ? @description : \@description;
1031    }
1032    
1033    
1034    sub describe_descendant
1035    {
1036        my $node = shift;
1037    
1038        my $x  = 0;                        # Distance to node
1039        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
1040        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
1041        {
1042            $node = $dl->[0];
1043            $x   += newick_x( $node );
1044            $dl   = newick_desc_ref( $node );
1045        }
1046        $x += newick_x( $node );
1047    
1048        #  Is it a tip?  Return list of one tip;
1049    
1050        if ( ( ! $dl ) || @$dl == 0 )
1051        {
1052            return ( [ newick_lbl( $node ) ], $x );
1053        }
1054    
1055        #  Get tips of each descendent, keeping lowest sorting from each.
1056        #  Return the two lowest of those (the third will come from the
1057        #  other side of the original node).
1058    
1059        my @rep_tips = sort { lc $a cmp lc $b }
1060                       map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1061                       @$dl;
1062        return ( [ @rep_tips[0,1] ], $x );
1063    }
1064    
1065    
1066    #-------------------------------------------------------------------------------
1067  #  Standard node name:  #  Standard node name:
1068  #     Tip label if at a tip  #     Tip label if at a tip
1069  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1070  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1071  #  #
1072  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1073  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1074  sub std_node_name {  sub std_node_name {
1075      my $tree = $_[0];      my $tree = $_[0];
# Line 734  Line 1086 
1086      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
1087      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1088    
1089      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
1090      my @best = map {      my @best = map
1091              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );            {
1092              @rest = set_difference( \@rest, \@tips );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1093                @rest = &set_difference( \@rest, \@tips );
1094              $tips[0];              $tips[0];
1095          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1096    
# Line 825  Line 1178 
1178      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1179      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1180         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1181         if ( @path ) { return @path }          return @path if @path;
1182      }      }
1183    
1184      ();  #  Not found      ();  #  Not found
# Line 856  Line 1209 
1209      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1210    
1211      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1212      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1213      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1214      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1215    
1216      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1217      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 909  Line 1262 
1262      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1263    
1264      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1265      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1266      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1267      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1268    
# Line 930  Line 1283 
1283    
1284      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1285                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1286      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1287      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1288    
1289      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1290      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1291      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1292      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1293    
# Line 1150  Line 1502 
1502      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1503    
1504      my $n = 0;      my $n = 0;
1505      if ( $not_root ) {      if ( $not_root )
1506        {
1507          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1508          $n++;          $n++;
1509      }      }
1510    
1511      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1512        {
1513          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1514      }      }
1515    
# Line 1164  Line 1518 
1518    
1519    
1520  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1521    #  Rescale all branch lenghts by factor.
1522    #
1523    #  $node = newick_rescale_branches( $node, $factor )
1524    #-------------------------------------------------------------------------------
1525    sub newick_rescale_branches {
1526        my ( $node, $factor ) = @_;
1527    
1528        my $x = newick_x( $node );
1529        set_newick_x( $node, $factor * $x ) if $x;
1530    
1531        foreach ( newick_desc_list( $node ) )
1532        {
1533            newick_rescale_branches( $_, $factor );
1534        }
1535    
1536        $node;
1537    }
1538    
1539    
1540    #-------------------------------------------------------------------------------
1541    #  Modify all branch lengths by a function.
1542    #
1543    #     $node = newick_modify_branches( $node, \&function )
1544    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1545    #
1546    #  Function must have form
1547    #
1548    #     $x2 = &$function( $x1 )
1549    #     $x2 = &$function( $x1, @$func_parms )
1550    #
1551    #-------------------------------------------------------------------------------
1552    sub newick_modify_branches {
1553        my ( $node, $func, $parm ) = @_;
1554    
1555        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1556        foreach ( newick_desc_list( $node ) )
1557        {
1558            newick_modify_branches( $_, $func, $parm )
1559        }
1560    
1561        $node;
1562    }
1563    
1564    
1565    #-------------------------------------------------------------------------------
1566  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1567  #  #
1568  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1189  Line 1588 
1588    
1589    
1590  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1591    #  Remove comments from a newick tree (e.g., before writing for phylip).
1592    #
1593    #  $node = newick_strip_comments( $node )
1594    #-------------------------------------------------------------------------------
1595    sub newick_strip_comments {
1596        my ( $node ) = @_;
1597    
1598        @$node = @$node[ 0 .. 2 ];
1599        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1600        $node;
1601    }
1602    
1603    
1604    #-------------------------------------------------------------------------------
1605  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1606  #  #
1607  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1336  Line 1749 
1749  #  #
1750  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1751  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1752  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1753    {
1754      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1755      my %cnt;      my %cnt;
1756    
# Line 1356  Line 1770 
1770  #           = 0 for no change, and  #           = 0 for no change, and
1771  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1772  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1773  sub reorder_against_tip_count {  sub reorder_against_tip_count
1774    {
1775      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1776    
1777      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1395  Line 1810 
1810  #  #
1811  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1812  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1813  sub random_order_newick_tree {  sub random_order_newick_tree
1814    {
1815      my ( $node ) = @_;      my ( $node ) = @_;
1816    
1817      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1404  Line 1820 
1820      #  Reorder this subtree:      #  Reorder this subtree:
1821    
1822      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1823      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1824    
1825      #  Reorder descendants:      #  Reorder descendants:
1826    
# Line 1419  Line 1835 
1835  #  #
1836  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1837  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1838  sub reroot_newick_by_path {  sub reroot_newick_by_path
1839    {
1840      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1841      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1842    
# Line 1432  Line 1849 
1849      #      #
1850      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1851      #      #
1852      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1853    
1854      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1855      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1447  Line 1864 
1864    
1865      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1866      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1867      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1868    
1869      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1870    
# Line 1527  Line 1944 
1944    
1945    
1946  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1947  #  Move root of tree to an approximate midpoint.  #  Reroot a newick tree along the path between 2 nodes:
1948  #  #
1949  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
1950  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1951  sub reroot_newick_to_approx_midpoint {  sub reroot_newick_between_nodes
1952      my ( $tree ) = @_;  {
1953        my ( $tree, $node1, $node2, $fraction ) = @_;
1954      #  Compile average tip to node distances assending      array_ref( $tree ) or return undef;
1955        $fraction >= 0 && $fraction <= 1 or return undef;
     my $dists1 = average_to_tips_1( $tree );  
   
     #  Compile average tip to node distances descending, returning midpoint node  
1956    
1957      my $node = average_to_tips_2( $dists1, undef, undef );      #  Find the paths to the nodes:
1958    
1959      #  Reroot      my @path1 = path_to_node( $tree, $node1 ) or return undef;
1960        my @path2 = path_to_node( $tree, $node2 ) or return undef;
1961    
1962      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
1963  }  }
1964    
1965    
1966  sub average_to_tips_1 {  #-------------------------------------------------------------------------------
1967      my ( $node ) = @_;  #  Reroot a newick tree along the path between 2 nodes:
1968    #
1969      my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );  #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
1970      my $x_below = 0;  #-------------------------------------------------------------------------------
1971      if ( @desc_dists )  sub reroot_newick_between_node_refs
1972      {      {
1973          foreach ( @desc_dists ) { $x_below += $_->[0] }      my ( $tree, $node1, $node2, $fraction ) = @_;
1974          $x_below /= @desc_dists;      array_ref( $tree ) or return undef;
     }  
     my $x = newick_x( $node ) || 0;  
     my $x_net = $x_below + $x;  
1975    
1976      [ $x_net, $x, $x_below, [ @desc_dists ], $node ]      #  Find the paths to the nodes:
1977    
1978        my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
1979        my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
1980    
1981        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
1982  }  }
1983    
1984    
1985  sub average_to_tips_2 {  #-------------------------------------------------------------------------------
1986      my ( $dists1, $x_above, $anc_node ) = @_;  #  Reroot a newick tree along the path between 2 nodes defined by paths:
1987      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;  #
1988    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
1989    #-------------------------------------------------------------------------------
1990    sub reroot_newick_between_nodes_by_path
1991    {
1992        my ( $tree, $path1, $path2, $fraction ) = @_;
1993        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
1994           or return undef;
1995        $fraction >= 0 && $fraction <= 1 or return undef;
1996    
1997      #  Are we done?  Root is in this node's branch, or "above"?      my @path1 = @$path1;
1998        my @path2 = @$path2;
1999    
2000      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      #  Trim the common prefix, saving it:
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
2001    
2002      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      my @prefix = ();
2003        while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2004      {      {
2005          #  At this point the root can only be in this node's branch,          push @prefix, splice( @path1, 0, 2 );
2006          #  or "above" it in the current rooting of the tree (which          splice( @path2, 0, 2 );
2007          #  would mean that the midpoint is actually down a different      }
         #  path from the root of the current tree).  
         #  
         #  Is the root in the current branch?  
2008    
2009          if ( ( $x_below + $x ) >= $x_above )      my ( @path, $dist );
2010        if    ( @path1 < 3 )
2011        {
2012            @path2 >= 3 or return undef;              # node1 = node2
2013            $dist = $fraction * newick_path_length( @path2 );
2014            @path = @path2;
2015        }
2016        elsif ( @path2 < 3 )
2017          {          {
2018              return ( $x_above >= $x_below ) ? $anc_node : $node;          $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2019            @path = @path1;
2020          }          }
2021          else          else
2022          {          {
2023              return undef;          my $dist1 = newick_path_length( @path1 );
2024            my $dist2 = newick_path_length( @path2 );
2025            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2026            @path = ( $dist <= 0 ) ? @path1 : @path2;
2027            $dist = abs( $dist );
2028          }          }
2029    
2030        #  Descend tree until we reach the insertion branch:
2031    
2032        my $x;
2033        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2034        {
2035            $dist -= $x;
2036            push @prefix, splice( @path, 0, 2 );
2037      }      }
2038    
2039      #  The root must be somewhere below this node:      #  Insert the new node:
2040    
2041        my $newnode = [ [ $path[2] ], undef, $dist ];
2042        set_newick_desc_i( $path[0], $path[1], $newnode );
2043        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2044    
2045        #  We can now build the path from root to the new node
2046    
2047        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2048    }
2049    
2050    
2051    #-------------------------------------------------------------------------------
2052    #  Move root of tree to an approximate midpoint.
2053    #
2054    #  $newtree = reroot_newick_to_approx_midpoint( $tree )
2055    #-------------------------------------------------------------------------------
2056    sub reroot_newick_to_approx_midpoint {
2057        my ( $tree ) = @_;
2058    
2059        #  Compile average tip to node distances assending
2060    
2061        my $dists1 = average_to_tips_1( $tree );
2062    
2063        #  Compile average tip to node distances descending, returning midpoint
2064        #  cadidates as a list of [ $node1, $node2, $fraction ]
2065    
2066        my @mids = average_to_tips_2( $dists1, undef, undef );
2067    
2068        #  Reroot to first midpoint candidate
2069    
2070        return $tree if ! @mids;
2071        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2072        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2073    }
2074    
2075    
2076    #-------------------------------------------------------------------------------
2077    #  Move root of tree to a midpoint.
2078    #
2079    #  $newtree = reroot_newick_to_midpoint( $tree )
2080    #-------------------------------------------------------------------------------
2081    sub reroot_newick_to_midpoint {
2082        my ( $tree ) = @_;
2083    
2084        #  Compile average tip to node distances assending
2085    
2086        my $dists1 = average_to_tips_1( $tree );
2087    
2088        #  Compile average tip to node distances descending, returning midpoint
2089        #  [ $node1, $node2, $fraction ]
2090    
2091        my @mids = average_to_tips_2( $dists1, undef, undef );
2092    
2093        @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2094    }
2095    
2096    
2097    #-------------------------------------------------------------------------------
2098    #  Compile average tip to node distances assending
2099    #-------------------------------------------------------------------------------
2100    sub average_to_tips_1 {
2101        my ( $node ) = @_;
2102    
2103        my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );
2104        my $x_below = 0;
2105        if ( @desc_dists )
2106        {
2107            foreach ( @desc_dists ) { $x_below += $_->[0] }
2108            $x_below /= @desc_dists;
2109        }
2110    
2111        my $x = newick_x( $node ) || 0;
2112        my $x_net = $x_below + $x;
2113    
2114        [ $x_net, $x, $x_below, [ @desc_dists ], $node ]
2115    }
2116    
2117    
2118    #-------------------------------------------------------------------------------
2119    #  Compile average tip to node distances descending, returning midpoint as
2120    #  [ $node1, $node2, $fraction_of_dist_between ]
2121    #-------------------------------------------------------------------------------
2122    sub average_to_tips_2 {
2123        my ( $dists1, $x_above, $anc_node ) = @_;
2124        my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2125    
2126        #  Are we done?  Root is in this node's branch, or "above"?
2127    
2128        my @mids = ();
2129        if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2130        {
2131            #  At this point the root can only be in this node's branch,
2132            #  or "above" it in the current rooting of the tree (which
2133            #  would mean that the midpoint is actually down a different
2134            #  path from the root of the current tree).
2135            #
2136            #  Is the root in the current branch?
2137    
2138            if ( ( $x_below + $x ) >= $x_above )
2139            {
2140                #  We will need to make a new node for the root, $fract of
2141                #  the way from $node to $anc_node:
2142                my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2143                                       : 0.5;
2144                push @mids, [ $node, $anc_node, $fract ];
2145            }
2146        }
2147    
2148        #  The root might be somewhere below this node:
2149    
2150      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2151      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1605  Line 2155 
2155          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2156    
2157          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2158          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2159      }      }
2160    
2161      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2162  }  }
2163    
2164    
# Line 1623  Line 2170 
2170  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2171      my ( $tree ) = @_;      my ( $tree ) = @_;
2172    
2173        #  Compile average tip to node distances assending from tips
2174    
2175        my $dists1 = average_to_tips_1_w( $tree );
2176    
2177        #  Compile average tip to node distances descending, returning midpoints
2178    
2179        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2180    
2181        #  Reroot to first midpoint candidate
2182    
2183        return $tree if ! @mids;
2184        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2185        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2186    }
2187    
2188    
2189    #-------------------------------------------------------------------------------
2190    #  Move root of tree to an approximate midpoint.  Weight by tips.
2191    #
2192    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2193    #-------------------------------------------------------------------------------
2194    sub reroot_newick_to_midpoint_w {
2195        my ( $tree ) = @_;
2196    
2197      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2198    
2199      my $dists1 = average_to_tips_1_w( $tree );      my $dists1 = average_to_tips_1_w( $tree );
2200    
2201      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2202    
2203      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2204    
2205      #  Reroot      #  Reroot at first candidate midpoint
2206    
2207      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2208  }  }
2209    
2210    
# Line 1654  Line 2225 
2225          }          }
2226          $x_below /= $n_below;          $x_below /= $n_below;
2227      }      }
2228    
2229      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2230      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2231    
# Line 1667  Line 2239 
2239    
2240      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2241    
2242      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2243      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2244      {      {
2245          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1679  Line 2247 
2247          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2248          #  path from the root of the current tree).          #  path from the root of the current tree).
2249          #          #
2250          #  Is the root in the current branch?          #  Is their a root in the current branch?
2251    
2252          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2253          {          {
2254              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2255          }              #  the way from $node to $anc_node:
2256          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2257          {                                     : 0.5;
2258              return undef;              push @mids, [ $node, $anc_node, $fract ];
2259          }          }
2260      }      }
2261    
# Line 1707  Line 2275 
2275    
2276          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2277                                   : 0;                                   : 0;
2278          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2279      }      }
2280    
2281      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2282  }  }
2283    
2284    
# Line 1859  Line 2424 
2424      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2425  }  }
2426    
2427    #-------------------------------------------------------------------------------
2428    #  Add a subtree to a newick tree node:
2429    #
2430    #  $node = newick_insert_at_node( $node, $subtree )
2431    #-------------------------------------------------------------------------------
2432    sub newick_insert_at_node
2433    {
2434        my ( $node, $subtree ) = @_;
2435        array_ref( $node ) && array_ref( $subtree ) or return undef;
2436    
2437        #  We could check validity of trees, but ....
2438    
2439        my $dl = newick_desc_ref( $node );
2440        if ( array_ref( $dl ) )
2441        {
2442            push @$dl, $subtree;
2443        }
2444        else
2445        {
2446            set_newick_desc_ref( $node, [ $subtree ] );
2447        }
2448        return $node;
2449    }
2450    
2451    
2452    #-------------------------------------------------------------------------------
2453    #  Insert a subtree into a newick tree along the path between 2 nodes:
2454    #
2455    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2456    #-------------------------------------------------------------------------------
2457    sub newick_insert_between_nodes
2458    {
2459        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2460        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2461        $fraction >= 0 && $fraction <= 1 or return undef;
2462    
2463        #  Find the paths to the nodes:
2464    
2465        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2466        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2467    
2468        #  Trim the common prefix:
2469    
2470        while ( $path1[1] == $path2[1] )
2471        {
2472            splice( @path1, 0, 2 );
2473            splice( @path2, 0, 2 );
2474        }
2475    
2476        my ( @path, $dist );
2477        if    ( @path1 < 3 )
2478        {
2479            @path2 >= 3 or return undef;              # node1 = node2
2480            $dist = $fraction * newick_path_length( @path2 );
2481            @path = @path2;
2482        }
2483        elsif ( @path2 < 3 )
2484        {
2485            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2486            @path = @path1;
2487        }
2488        else
2489        {
2490            my $dist1 = newick_path_length( @path1 );
2491            my $dist2 = newick_path_length( @path2 );
2492            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2493            @path = ( $dist <= 0 ) ? @path1 : @path2;
2494            $dist = abs( $dist );
2495        }
2496    
2497        #  Descend tree until we reach the insertion branch:
2498    
2499        my $x;
2500        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2501        {
2502            $dist -= $x;
2503            splice( @path, 0, 2 );
2504        }
2505    
2506        #  Insert the new node:
2507    
2508        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2509        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2510    
2511        return $tree;
2512    }
2513    
2514    
2515  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2516  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
# Line 1941  Line 2593 
2593    
2594    
2595  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2596    #  Produce a potentially rooted subtree with the desired tips:
2597    #
2598    #     Except for (some) tip nodes, the tree produced is a copy.
2599    #     There is no check that requested tips exist.
2600    #
2601    #  $newtree = rooted_newick_subtree( $tree,  @tips )
2602    #  $newtree = rooted_newick_subtree( $tree, \@tips )
2603    #-------------------------------------------------------------------------------
2604    sub rooted_newick_subtree {
2605        my ( $tr, @tips ) = @_;
2606        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2607    
2608        if ( @tips < 2 ) { return undef }
2609        my $keephash = { map { ( $_, 1 ) } @tips };
2610        my $tr2 = subtree1( $tr, $keephash );
2611        $tr2->[2] = undef if $tr2;                   # undef root branch length
2612        $tr2;
2613    }
2614    
2615    
2616    #-------------------------------------------------------------------------------
2617  #  Produce a subtree with the desired tips:  #  Produce a subtree with the desired tips:
2618  #  #
2619  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
# Line 2014  Line 2687 
2687  }  }
2688    
2689    
2690    #-------------------------------------------------------------------------------
2691    #  The smallest subtree of rooted tree that includes @tips:
2692    #
2693    #    $node = newick_covering_subtree( $tree,  @tips )
2694    #    $node = newick_covering_subtree( $tree, \@tips )
2695    #-------------------------------------------------------------------------------
2696    
2697    sub newick_covering_subtree {
2698        my $tree = shift;
2699        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2700    
2701        #  Return smallest covering node, if any:
2702    
2703        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2704    }
2705    
2706    
2707    sub newick_covering_subtree_1 {
2708        my ( $node, $tips ) = @_;
2709        my $n_cover = 0;
2710        my @desc = newick_desc_list( $node );
2711        if ( @desc )
2712        {
2713            foreach ( @desc )
2714            {
2715                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2716                return ( $subtree, $n ) if $subtree;
2717                $n_cover += $n;
2718            }
2719        }
2720        elsif ( $tips->{ newick_lbl( $node ) } )
2721        {
2722            $n_cover++;
2723        }
2724    
2725        #  If all tips are covered, return node
2726    
2727        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2728    }
2729    
2730    
2731    #===============================================================================
2732    #
2733    #  Representative subtrees
2734    #
2735    #===============================================================================
2736    #  Find subtree of size n representating vicinity of the root:
2737    #
2738    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2739    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2740    #
2741    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2742    #  consequences on downstream programs.
2743    #-------------------------------------------------------------------------------
2744    sub root_neighborhood_representative_tree
2745    {
2746        my ( $tree, $n, $tip_priority ) = @_;
2747        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2748        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2749    
2750        $tip_priority ||= default_tip_priority( $tree );
2751        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2752                   root_proximal_newick_subtrees( $tree, $n );
2753    
2754        newick_subtree( copy_newick_tree( $tree ), \@tips );
2755    }
2756    
2757    
2758    #-------------------------------------------------------------------------------
2759    #  Find n tips to represent tree lineages in vicinity of another tip.
2760    #  Default tip priority is short total branch length.
2761    #
2762    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2763    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2764    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2765    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2766    #-------------------------------------------------------------------------------
2767    sub root_neighborhood_representative_tips
2768    {
2769        my ( $tree, $n, $tip_priority ) = @_;
2770        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2771    
2772        my @tips;
2773        if ( newick_tip_count( $tree ) <= $n )
2774        {
2775            @tips = newick_tip_list( $tree );
2776        }
2777        else
2778        {
2779            $tip_priority ||= default_tip_priority( $tree );
2780            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2781                    root_proximal_newick_subtrees( $tree, $n );
2782        }
2783    
2784        wantarray ? @tips : \@tips;
2785    }
2786    
2787    
2788    #-------------------------------------------------------------------------------
2789    #  Find subtree of size n representating vicinity of a tip:
2790    #
2791    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2792    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2793    #-------------------------------------------------------------------------------
2794    sub tip_neighborhood_representative_tree
2795    {
2796        my ( $tree, $tip, $n, $tip_priority ) = @_;
2797        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2798        newick_tip_in_tree( $tree, $tip ) or return undef;
2799    
2800        my $tree1 = copy_newick_tree( $tree );
2801        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2802        {
2803            return prune_from_newick( $tree1, $tip )
2804        }
2805    
2806        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2807        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2808        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2809        newick_subtree( copy_newick_tree( $tree ), \@tips );
2810    }
2811    
2812    
2813    #-------------------------------------------------------------------------------
2814    #  Find n tips to represent tree lineages in vicinity of another tip.
2815    #  Default tip priority is short total branch length.
2816    #
2817    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2818    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2819    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2820    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2821    #-------------------------------------------------------------------------------
2822    sub tip_neighborhood_representative_tips
2823    {
2824        my ( $tree, $tip, $n, $tip_priority ) = @_;
2825        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2826        newick_tip_in_tree( $tree, $tip ) or return undef;
2827    
2828        my @tips = newick_tip_list( $tree );
2829        if ( newick_tip_count( $tree ) - 1 <= $n )
2830        {
2831            @tips = grep { $_ ne $tip } @tips;
2832        }
2833        else
2834        {
2835            my $tree1 = copy_newick_tree( $tree );
2836            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2837            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2838            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2839        }
2840    
2841        wantarray ? @tips : \@tips;
2842    }
2843    
2844    
2845    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2846    #  Anonymous hash of the negative distance from root to each tip:
2847    #
2848    #   \%tip_priority = default_tip_priority( $tree )
2849    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2850    sub default_tip_priority
2851    {
2852        my ( $tree ) = @_;
2853        my $tip_distances = newick_tip_distances( $tree ) || {};
2854        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2855    }
2856    
2857    
2858    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2859    #  Select a tip from a subtree base on a priority value:
2860    #
2861    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2862    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2863    sub representative_tip_of_newick_node
2864    {
2865        my ( $node, $tip_priority ) = @_;
2866        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2867                      map  { [ $_, $tip_priority->{ $_ } ] }
2868                      newick_tip_list( $node );
2869        $tip->[0];                                   # Label from label-priority pair
2870    }
2871    
2872    
2873    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2874    #  Find n subtrees focused around the root of a tree.  Typically each will
2875    #  then be reduced to a single tip to make a representative tree:
2876    #
2877    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2878    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2879    sub root_proximal_newick_subtrees
2880    {
2881        my ( $tree, $n ) = @_;
2882        my $node_start_end = newick_branch_intervals( $tree );
2883        n_representative_branches( $n, $node_start_end );
2884    }
2885    
2886    
2887    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2888    #   @node_start_end = newick_branch_intervals( $tree )
2889    #  \@node_start_end = newick_branch_intervals( $tree )
2890    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2891    sub newick_branch_intervals
2892    {
2893        my ( $node, $parent_x ) = @_;
2894        $parent_x ||= 0;
2895        my ( $desc, undef, $dx ) = @$node;
2896        my $x = $parent_x + $dx;
2897        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2898        my @intervals = ( $interval,
2899                          map { &newick_branch_intervals( $_, $x ) } @$desc
2900                        );
2901        return wantarray ? @intervals : \@intervals;
2902    }
2903    
2904    
2905    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2906    #   @ids = n_representative_branches( $n,  @id_start_end )
2907    #   @ids = n_representative_branches( $n, \@id_start_end )
2908    #  \@ids = n_representative_branches( $n,  @id_start_end )
2909    #  \@ids = n_representative_branches( $n, \@id_start_end )
2910    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2911    sub n_representative_branches
2912    {
2913        my $n = shift;
2914        #  Sort intervals by start point:
2915        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2916                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2917        my @active = ();
2918        my ( $interval, $current_point );
2919        foreach $interval ( @unprocessed )
2920        {
2921            $current_point = $interval->[1];
2922            #  Filter out intervals that have ended.  This is N**2 in the number
2923            #  of representatives.  Fixing this would require maintaining a sorted
2924            #  active list.
2925            @active = grep { $_->[2] > $current_point } @active;
2926            push @active, $interval;
2927            last if ( @active >= $n );
2928        }
2929    
2930        my @ids = map { $_->[0] } @active;
2931        return wantarray() ? @ids : \@ids;
2932    }
2933    
2934    
2935  #===============================================================================  #===============================================================================
2936  #  #
2937  #  Tree writing and reading  #  Tree writing and reading
2938  #  #
2939  #===============================================================================  #===============================================================================
2940  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
2941    #  writeNewickTree( $tree, $file )
2942    #  writeNewickTree( $tree, \*FH )
2943  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2944  sub writeNewickTree {  sub writeNewickTree {
2945      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
2946      $file || ( $file = \*STDOUT );      my ( $fh, $close ) = open_output( $file );
2947      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
2948        print  $fh  ( strNewickTree( $tree ), "\n" );
2949        close $fh if $close;
2950  }  }
2951    
2952    
# Line 2167  Line 3089 
3089    
3090    
3091  #===============================================================================  #===============================================================================
3092    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
3093    #  @trees = read_newick_trees( $file ) # reads to end of file
3094    #===============================================================================
3095    
3096    sub read_newick_tree
3097    {
3098        my $file = shift;
3099        my ( $fh, $close ) = open_input( $file );
3100        my $tree;
3101        my @lines = ();
3102        foreach ( <$fh> )
3103        {
3104            chomp;
3105            push @lines, $_;
3106            if ( /;/ )
3107            {
3108                $tree = parse_newick_tree_str( join( ' ', @lines ) );
3109                last;
3110            }
3111        }
3112        close $fh if $close;
3113    
3114        $tree;
3115    }
3116    
3117    
3118    sub read_newick_trees
3119    {
3120        my $file = shift;
3121        my ( $fh, $close ) = open_input( $file );
3122        my @trees = ();
3123        my @lines = ();
3124        foreach ( <$fh> )
3125        {
3126            chomp;
3127            push @lines, $_;
3128            if ( /;/ )
3129            {
3130                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
3131                @lines = ()
3132            }
3133        }
3134        close $fh if $close;
3135    
3136        @trees;
3137    }
3138    
3139    
3140    #===============================================================================
3141  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
3142  #  #
3143  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2337  Line 3308 
3308      #  Loop while it is a comment:      #  Loop while it is a comment:
3309      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3310          $ind++;          $ind++;
3311            my $depth = 1;
3312            my $ind2  = $ind;
3313    
3314          #  Find end          #  Find end
3315          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3316            {
3317                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3318                {
3319                    $ind2 += length( $1 );  #  Points at char just past [
3320                    $depth++;               #  If nested comments are allowed
3321                }
3322                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3323                {
3324                    $ind2 += length( $1 );  #  Points at char just past ]
3325                    $depth--;
3326                }
3327                else
3328                {
3329              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3330                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3331          }          }
3332          my $comment = $1;          }
3333    
3334          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3335          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3336    
3337          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3338    
3339          #  Skip white space          #  Skip white space
3340          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2367  Line 3353 
3353  #===============================================================================  #===============================================================================
3354  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3355  #  #
3356  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3357  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3358  #     $width  the approximate characters for the tree without labels  #
3359  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3360  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3361  #  #     $width  # the approximate characters for the tree without labels (D = 68)
3362  #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  #     $min_dx # the minimum horizontal branch length (D = 2)
3363  #===============================================================================  #     $dy     # the vertical space per taxon (D = 1, most compressed)
3364  sub printer_plot_newick {  #
3365      my ( $node, $file, $width, $min_dx, $dy ) = @_;  #  Options:
3366    #
3367      my ( $fh, $close );  #    dy     => nat_number    # the vertical space per taxon
3368      if ( ! defined( $file ) ) {  #    chars  => key           # line drawing character set:
3369          $fh = \*STDOUT;  #                            #     html_unicode
3370      }  #                            #     text (default)
3371      elsif ( ref( $file ) eq 'GLOB' ) {  #    min_dx => whole_number  # the minimum horizontal branch length
3372          $fh = $file;  #    width  => whole_number  # approximate tree width without labels
3373      }  #
3374      else {  #===============================================================================
3375          open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  sub printer_plot_newick
3376          $close = 1;  {
3377      }      my ( $node, $file, @opts ) = @_;
3378    
3379        my ( $fh, $close ) = open_output( $file );
3380        $fh or return;
3381    
3382        my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3383                            && $opts[0]->{ chars }
3384                            && $opts[0]->{ chars } =~ /html/;
3385        print $fh '<PRE>' if $html;
3386        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3387        print $fh "</PRE>\n" if $html;
3388    
     print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";  
3389      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3390  }  }
3391    
3392    
3393  #===============================================================================  #===============================================================================
3394    #  Character sets for printer plot trees:
3395    #-------------------------------------------------------------------------------
3396    
3397    my %char_set =
3398      ( text1     => { space  => ' ',
3399                       horiz  => '-',
3400                       vert   => '|',
3401                       el_d_r => '/',
3402                       el_u_r => '\\',
3403                       el_d_l => '\\',
3404                       el_u_l => '/',
3405                       tee_l  => '+',
3406                       tee_r  => '+',
3407                       tee_u  => '+',
3408                       tee_d  => '+',
3409                       half_l => '-',
3410                       half_r => '-',
3411                       half_u => '|',
3412                       half_d => '|',
3413                       cross  => '+',
3414                     },
3415        text2     => { space  => ' ',
3416                       horiz  => '-',
3417                       vert   => '|',
3418                       el_d_r => '+',
3419                       el_u_r => '+',
3420                       el_d_l => '+',
3421                       el_u_l => '+',
3422                       tee_l  => '+',
3423                       tee_r  => '+',
3424                       tee_u  => '+',
3425                       tee_d  => '+',
3426                       half_l => '-',
3427                       half_r => '-',
3428                       half_u => '|',
3429                       half_d => '|',
3430                       cross  => '+',
3431                     },
3432        html_box  => { space  => '&nbsp;',
3433                       horiz  => '&#9472;',
3434                       vert   => '&#9474;',
3435                       el_d_r => '&#9484;',
3436                       el_u_r => '&#9492;',
3437                       el_d_l => '&#9488;',
3438                       el_u_l => '&#9496;',
3439                       tee_l  => '&#9508;',
3440                       tee_r  => '&#9500;',
3441                       tee_u  => '&#9524;',
3442                       tee_d  => '&#9516;',
3443                       half_l => '&#9588;',
3444                       half_r => '&#9590;',
3445                       half_u => '&#9589;',
3446                       half_d => '&#9591;',
3447                       cross  => '&#9532;',
3448                     },
3449        utf8_box  => { space  => ' ',
3450                       horiz  => chr(226) . chr(148) . chr(128),
3451                       vert   => chr(226) . chr(148) . chr(130),
3452                       el_d_r => chr(226) . chr(148) . chr(140),
3453                       el_u_r => chr(226) . chr(148) . chr(148),
3454                       el_d_l => chr(226) . chr(148) . chr(144),
3455                       el_u_l => chr(226) . chr(148) . chr(152),
3456                       tee_l  => chr(226) . chr(148) . chr(164),
3457                       tee_r  => chr(226) . chr(148) . chr(156),
3458                       tee_u  => chr(226) . chr(148) . chr(180),
3459                       tee_d  => chr(226) . chr(148) . chr(172),
3460                       half_l => chr(226) . chr(149) . chr(180),
3461                       half_r => chr(226) . chr(149) . chr(182),
3462                       half_u => chr(226) . chr(149) . chr(181),
3463                       half_d => chr(226) . chr(149) . chr(183),
3464                       cross  => chr(226) . chr(148) . chr(188),
3465                     },
3466      );
3467    
3468    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3469    $char_set{ html1 }->{ space } = '&nbsp;';
3470    
3471    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3472    $char_set{ html2 }->{ space } = '&nbsp;';
3473    
3474    #  Define some synonyms
3475    
3476    $char_set{ html } = $char_set{ html_box };
3477    $char_set{ line } = $char_set{ utf8_box };
3478    $char_set{ symb } = $char_set{ utf8_box };
3479    $char_set{ text } = $char_set{ text1 };
3480    $char_set{ utf8 } = $char_set{ utf8_box };
3481    
3482    #  Define tree formats and synonyms
3483    
3484    my %tree_format =
3485        ( text         => 'text',
3486          tree_tab_lbl => 'tree_tab_lbl',
3487          tree_lbl     => 'tree_lbl',
3488          chrlist_lbl  => 'chrlist_lbl',
3489          raw          => 'chrlist_lbl',
3490        );
3491    
3492    #===============================================================================
3493  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3494  #  #
3495  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3496  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3497  #     $min_dx the minimum horizontal branch length  #
3498  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3499    #     $width  # the approximate characters for the tree without labels (D = 68)
3500    #     $min_dx # the minimum horizontal branch length (D = 2)
3501    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3502    #
3503    #  Options:
3504    #
3505    #    chars  => keyword       # the output character set for the tree
3506    #    dy     => nat_number    # the vertical space per taxon
3507    #    format => keyword       # output format of each line
3508    #    min_dx => whole_number  # the minimum horizontal branch length
3509    #    width  => whole_number  # approximate tree width without labels
3510    #
3511    #  Character sets:
3512    #
3513    #    html       #  synonym of html1
3514    #    html_box   #  html encoding of unicode box drawing characters
3515    #    html1      #  text1 with nonbreaking spaces
3516    #    html2      #  text2 with nonbreaking spaces
3517    #    line       #  synonym of utf8_box
3518    #    raw        #  pass out the internal representation
3519    #    symb       #  synonym of utf8_box
3520    #    text       #  synonym of text1 (Default)
3521    #    text1      #  ascii characters: - + | / \ and space
3522    #    text2      #  ascii characters: - + | + + and space
3523    #    utf8       #  synonym of utf8_box
3524    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3525    #
3526    #  Formats for row lines:
3527    #
3528    #    text           #    $textstring              # Default
3529    #    tree_tab_lbl   #    $treestr \t $labelstr
3530    #    tree_lbl       # [  $treestr,  $labelstr ]
3531    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3532    #    raw            #  synonym of chrlist_lbl
3533  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3534  #===============================================================================  #===============================================================================
3535  sub text_plot_newick {  sub text_plot_newick
3536      my ( $node, $width, $min_dx, $dy ) = @_;  {
3537        my $node = shift @_;
3538      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3539      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3540      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3541      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3542        {
3543            $opts = shift;
3544        }
3545        else
3546        {
3547            ( $width, $min_dx, $dy ) = @_;
3548            $opts = {};
3549        }
3550    
3551        $chars = $opts->{ chars } || '';
3552        my $charH;
3553        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3554        my $is_box = $charH eq $char_set{ html_box }
3555                  || $charH eq $char_set{ utf8_box }
3556                  || $chars eq 'raw';
3557    
3558        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3559        $fmt = $tree_format{ $fmt || '' } || 'text';
3560    
3561        $dy    ||= $opts->{ dy     } ||  1;
3562        $width ||= $opts->{ width  } || 68;
3563        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3564        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3565    
3566        #  Layout the tree:
3567    
3568      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3569      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2419  Line 3572 
3572      my $hash = {};      my $hash = {};
3573      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3574    
3575      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3576    
3577      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3578      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3579        foreach ( ( $y1 .. $y2 ) )
3580        {
3581            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l' );
3582            my $lbl  = '';
3583            if ( @$line )
3584            {
3585                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3586                #  Translate tree characters
3587                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3588            }
3589    
3590            # Convert to requested output format:
3591    
3592            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3593                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3594                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3595                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3596                       :                          ();
3597        }
3598    
3599        # if ( $cells )
3600        # {
3601        #     my $nmax = 0;
3602        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3603        #     foreach ( @lines )
3604        #     {
3605        #         @$_ = map { "<TD>$_</TD>" } @$_;
3606        #         my $span = $nmax - @$_ + 1;
3607        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3608        #     }
3609        # }
3610        # elsif ( $tables )
3611        # {
3612        #     my $nmax = 0;
3613        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3614        #     foreach ( @lines )
3615        #     {
3616        #         @$_ = map { "<TD>$_</TD>" } @$_;
3617        #         my $span = $nmax - @$_ + 1;
3618        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3619        #     }
3620        # }
3621    
3622        wantarray ? @lines : \@lines;
3623  }  }
3624    
3625    
3626  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3627  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3628  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3629  sub layout_printer_plot {  sub layout_printer_plot
3630      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3631        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3632      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3633      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3634    
3635      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3636      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3637          $dx *= $x_scale;          $dx *= $x_scale;
3638          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3639      }      }
3640      else {      else {
3641          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2465  Line 3662 
3662          $ymax = $y0;          $ymax = $y0;
3663    
3664          foreach ( @dl ) {          foreach ( @dl ) {
3665              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3666                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3667                                                            );
3668              push @ylist, $yi;              push @ylist, $yi;
3669              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3670          }          }
# Line 2475  Line 3674 
3674    
3675          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3676          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3677          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3678      }      }
3679    
3680      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2485  Line 3684 
3684  }  }
3685    
3686    
3687    #  What symbol do we get if we add a leftward line to some other symbol?
3688    
3689    my %with_left_line = ( space  => 'half_l',
3690                           horiz  => 'horiz',
3691                           vert   => 'tee_l',
3692                           el_d_r => 'tee_d',
3693                           el_u_r => 'tee_u',
3694                           el_d_l => 'el_d_l',
3695                           el_u_l => 'el_u_l',
3696                           tee_l  => 'tee_l',
3697                           tee_r  => 'cross',
3698                           tee_u  => 'tee_u',
3699                           tee_d  => 'tee_d',
3700                           half_l => 'half_l',
3701                           half_r => 'horiz',
3702                           half_u => 'el_u_l',
3703                           half_d => 'el_d_l',
3704                           cross  => 'cross',
3705                         );
3706    
3707    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3708    #  Produce a description of one line of a printer plot tree.
3709    #
3710    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb )
3711    #
3712    #     \@line is the character descriptions accumulated so far, one per array
3713    #          element, except for a label, which can be any number of characters.
3714    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3715    #          then $line->[-2] is a label. The calling program translates the
3716    #          symbol names to output characters.
3717    #
3718    #     \@node is a newick tree node
3719    #     \%hash contains tree layout information
3720    #      $row  is the row number (y value) that we are building
3721    #      $symb is the plot symbol proposed for the current x and y position
3722    #
3723    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3724    sub text_tree_row
3725    {
3726        my ( $node, $hash, $row, $line, $symb ) = @_;
3727    
3728        my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3729        if ( $row < $y1 || $row > $y2 ) { return $line }
3730    
3731        if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3732    
3733        if ( $row == $y ) {
3734            while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
3735            push @$line, $symb,
3736                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
3737        }
3738    
3739        elsif ( $row > $yn1 && $row < $yn2 ) {
3740            if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
3741            else               { $line->[$x] = 'vert' }
3742        }
3743    
3744        my @dl = newick_desc_list( $node );
3745    
3746        if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
3747    
3748        else {
3749            my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
3750            if ( @list > 1 ) { #  Fix top and bottom sympbols
3751                $list[ 0]->[1] = 'el_d_r';
3752                $list[-1]->[1] = 'el_u_r';
3753            }
3754            elsif ( @list ) {  # Only one descendent
3755                $list[ 0]->[1] = 'half_r';
3756            }
3757            foreach ( @list ) {
3758                my ( $n, $s ) = @$_;
3759                if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
3760                    $line = text_tree_row( $n, $hash, $row, $line, $s );
3761                }
3762             }
3763    
3764            if ( $row == $y ) {
3765                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
3766                                                         : $with_left_line{ $line->[$x] };
3767            }
3768        }
3769    
3770        return $line;
3771    }
3772    
3773    
3774  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3775  #  Debug routine  #  Debug routine
3776  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Line 2510  Line 3796 
3796  }  }
3797    
3798    
3799  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #===============================================================================
3800  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Open an input file stream:
3801  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #
3802  sub text_tree_row {  #     ( $handle, undef ) = open_input(       );  # \*STDIN
3803      my ( $node, $hash, $row, $line, $symb ) = @_;  #     ( $handle, undef ) = open_input( \*FH  );
3804    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
3805    #
3806    #===============================================================================
3807    sub open_input
3808    {
3809        my $file = shift;
3810        my $fh;
3811        if    ( ! defined( $file ) )     { return ( \*STDIN ) }
3812        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
3813        elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
3814    
3815      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      print STDERR "gjonewick::open_input could not open '$file' for reading\n";
3816      if ( $row < $y1 || $row > $y2 ) { return $line }      return undef;
3817    }
3818    
     if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }  
3819    
3820      if ( $row == $y ) {  #===============================================================================
3821          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");  #  Open an output file stream:
3822    #
3823    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
3824    #     ( $handle, undef ) = open_output( \*FH );
3825    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
3826    #
3827    #===============================================================================
3828    sub open_output
3829    {
3830        my $file = shift;
3831        my $fh;
3832        if    ( ! defined( $file ) )     { return ( \*STDOUT ) }
3833        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
3834        elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
3835    
3836        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
3837        return undef;
3838      }      }
3839    
3840      elsif ( $row > $yn1 && $row < $yn2 ) {  
3841          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }  #===============================================================================
3842          else { substr( $line, $x ) = "|" }  #  Some subroutines copied from gjolists
3843    #===============================================================================
3844    #  Return the common prefix of two lists:
3845    #
3846    #  @common = common_prefix( \@list1, \@list2 )
3847    #-----------------------------------------------------------------------------
3848    sub common_prefix
3849    {
3850        my ($l1, $l2) = @_;
3851        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3852        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3853    
3854        my $i = 0;
3855        my $l1_i;
3856        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
3857    
3858        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
3859      }      }
3860    
     my @dl = newick_desc_list( $node );  
3861    
3862      if ( @dl < 1 ) {  #-----------------------------------------------------------------------------
3863          $line .= " " . $node->[1];  #  Return the unique suffixes of each of two lists:
3864    #
3865    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
3866    #-----------------------------------------------------------------------------
3867    sub unique_suffixes
3868    {
3869        my ($l1, $l2) = @_;
3870        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3871        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3872    
3873        my $i = 0;
3874        my @l1 = @$l1;
3875        my @l2 = @$l2;
3876        my $l1_i;
3877        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
3878    
3879        splice @l1, 0, $i;
3880        splice @l2, 0, $i;
3881        return ( \@l1, \@l2 );
3882      }      }
3883    
     else {  
         my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line  
         $list[ 0]->[1] = "/";  
         $list[-1]->[1] = "\\";  
3884    
3885          foreach ( @list ) {  #-------------------------------------------------------------------------------
3886              my ( $n, $s ) = @$_;  #  List of values duplicated in a list (stable in order by second occurance):
3887              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {  #
3888                  $line = text_tree_row( $n, $hash, $row, $line, $s );  #  @dups = duplicates( @list )
3889    #-------------------------------------------------------------------------------
3890    sub duplicates
3891    {
3892        my %cnt = ();
3893        grep { ++$cnt{$_} == 2 } @_;
3894    }
3895    
3896    
3897    #-------------------------------------------------------------------------------
3898    #  Randomize the order of a list:
3899    #
3900    #  @random = random_order( @list )
3901    #-------------------------------------------------------------------------------
3902    sub random_order
3903    {
3904        my ( $i, $j );
3905        for ( $i = @_ - 1; $i > 0; $i-- )
3906        {
3907            $j = int( ($i+1) * rand() );
3908            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
3909        }
3910    
3911       @_;
3912              }              }
3913    
3914    
3915    #-----------------------------------------------------------------------------
3916    #  Intersection of two or more sets:
3917    #
3918    #  @intersection = intersection( \@set1, \@set2, ... )
3919    #-----------------------------------------------------------------------------
3920    sub intersection
3921    {
3922        my $set = shift;
3923        my @intersection = @$set;
3924    
3925        foreach $set ( @_ )
3926        {
3927            my %set = map { $_ => 1 } @$set;
3928            @intersection = grep { exists $set{ $_ } } @intersection;
3929           }           }
3930    
3931          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }      @intersection;
3932      }      }
3933    
3934      return $line;  
3935    #-----------------------------------------------------------------------------
3936    #  Elements in set 1, but not set 2:
3937    #
3938    #  @difference = set_difference( \@set1, \@set2 )
3939    #-----------------------------------------------------------------------------
3940    sub set_difference
3941    {
3942        my ($set1, $set2) = @_;
3943        my %set2 = map { $_ => 1 } @$set2;
3944        grep { ! ( exists $set2{$_} ) } @$set1;
3945  }  }
3946    
3947    

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.15

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3