[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Sun Feb 11 18:25:48 2007 UTC revision 1.16, Tue Sep 8 01:59:24 2009 UTC
# Line 77  Line 77 
77  #  Tree format interconversion:  #  Tree format interconversion:
78  #===============================================================================  #===============================================================================
79  #  #
80    #  $bool      = is_overbeek_tree( $tree )
81    #  $bool      = is_gjonewick_tree( $tree )
82    #
83  #  $gjonewick = overbeek_to_gjonewick( $overbeek )  #  $gjonewick = overbeek_to_gjonewick( $overbeek )
84  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
85  #  #
# Line 107  Line 110 
110  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
111  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
112  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
113  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
114  #  #
115  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
116  #  #
# Line 118  Line 121 
121  #  #
122  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
123  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
124    # \@tiprefs = newick_tip_ref_list( $noderef )
125  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
126    # \@tips    = newick_tip_list( $noderef )
127    #
128  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
129  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
130    #
131  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
132    # \@tips    = newick_duplicated_tips( $noderef )
133    #
134  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
135    #
136  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
137    # \@tips    = newick_shared_tips( $tree1, $tree2 )
138  #  #
139  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
140    #
141    #  %tip_distances = newick_tip_distances( $noderef )
142    # \%tip_distances = newick_tip_distances( $noderef )
143    #
144  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
145  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
146  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
147  #  #
148  #  Tree tip insertion point (tip is on branch of length x that  #  Tree tip insertion point (tip is on branch of length x that
149  #  is inserted into branch connecting node1 and node2, a distance  #  is inserted into branch connecting node1 and node2, a distance
150  #  x1 from node1 and x2 from node2):  #  x1 from node1 and x2 from node2):
151  #  #
152  #  [ $node1, $x1, $node2, $x2, $x ]  #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
 #           = newick_tip_insertion_point( $tree, $tip )  
153  #  #
154  #  Standardized label for a node in terms of intersection of 3 lowest sorting  #  Standardized label for a node in terms of intersection of 3 lowest sorting
155  #  tips (sort is lower case):  #  tips (sort is lower case):
156  #  #
157  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
158  #  #
159  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
160  #  Paths from root of tree:  #  Paths from root of tree:
# Line 189  Line 203 
203  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
204  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
205  #  $node      = newick_rescale_branches( $node, $factor )  #  $node      = newick_rescale_branches( $node, $factor )
206    #  $node      = newick_modify_branches( $node, \&function )
207    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
208  #  #
209  #  Modify comments:  #  Modify comments:
210  #  #
# Line 206  Line 222 
222  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
223  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
224  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
225  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
226    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
227    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
228  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
229  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
230  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
231  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
232  #  #
233  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
234    #  $newtree = rooted_newick_subtree( $tree,  @tips )
235    #  $newtree = rooted_newick_subtree( $tree, \@tips )
236  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
237  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
238    #  $newtree = newick_covering_subtree( $tree,  @tips )
239    #  $newtree = newick_covering_subtree( $tree, \@tips )
240  #  #
241  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
242  #  #
# Line 222  Line 244 
244  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
245  #  #
246  #===============================================================================  #===============================================================================
247    #  Tree neighborhood: subtree of n tips to represent a larger tree.
248    #===============================================================================
249    #
250    #  Focus around root:
251    #
252    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
253    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
254    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
255    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
256    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
257    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
258    #
259    #  Focus around a tip insertion point (the tip is not in the subtree):
260    #
261    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
262    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
263    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
264    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
265    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
266    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
267    #
268    #===============================================================================
269  #  Tree reading and writing:  #  Tree reading and writing:
270  #===============================================================================  #===============================================================================
271    #  Write machine-readable trees:
272  #  #
273  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
274  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
# Line 231  Line 276 
276  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
277  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
278  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
279    #
280    #  Write human-readable trees:
281    #
282  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
283  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
284  #  #
285    #  Read trees:
286    #
287  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  $tree  = read_newick_tree( $file )  # reads to a semicolon
288  #  @trees = read_newick_trees( $file ) # reads to end of file  #  @trees = read_newick_trees( $file ) # reads to end of file
289  #  $tree  = parse_newick_tree_str( $string )  #  $tree  = parse_newick_tree_str( $string )
# Line 243  Line 293 
293    
294  use Carp;  use Carp;
295  use Data::Dumper;  use Data::Dumper;
296    use strict;
297    
298  require Exporter;  require Exporter;
299    
300  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
301  our @EXPORT = qw(  our @EXPORT = qw(
302            is_overbeek_tree
303            is_gjonewick_tree
304          overbeek_to_gjonewick          overbeek_to_gjonewick
305          gjonewick_to_overbeek          gjonewick_to_overbeek
   
306          newick_is_valid          newick_is_valid
307          newick_is_rooted          newick_is_rooted
308          newick_is_unrooted          newick_is_unrooted
309          tree_rooted_on_tip          tree_rooted_on_tip
310          newick_is_bifurcating          newick_is_bifurcating
311          newick_tip_count          newick_tip_count
312            newick_tip_ref_list
313          newick_tip_list          newick_tip_list
314    
315          newick_first_tip          newick_first_tip
316          newick_duplicated_tips          newick_duplicated_tips
317          newick_tip_in_tree          newick_tip_in_tree
318          newick_shared_tips          newick_shared_tips
319    
320          newick_tree_length          newick_tree_length
321            newick_tip_distances
322          newick_max_X          newick_max_X
323          newick_most_distant_tip_ref          newick_most_distant_tip_ref
324          newick_most_distant_tip_name          newick_most_distant_tip_name
325    
326            newick_tip_insertion_point
327    
328          std_newick_name          std_newick_name
329    
330          path_to_tip          path_to_tip
# Line 290  Line 347 
347          newick_set_all_branches          newick_set_all_branches
348          newick_fix_negative_branches          newick_fix_negative_branches
349          newick_rescale_branches          newick_rescale_branches
350            newick_modify_branches
351    
352          newick_strip_comments          newick_strip_comments
353    
# Line 305  Line 363 
363          reroot_newick_next_to_tip          reroot_newick_next_to_tip
364          reroot_newick_to_node          reroot_newick_to_node
365          reroot_newick_to_node_ref          reroot_newick_to_node_ref
366            reroot_newick_between_nodes
367            reroot_newick_to_midpoint
368            reroot_newick_to_midpoint_w
369          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
370          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
371          uproot_tip_rooted_newick          uproot_tip_rooted_newick
372          uproot_newick          uproot_newick
373    
374          prune_from_newick          prune_from_newick
375            rooted_newick_subtree
376          newick_subtree          newick_subtree
377            newick_covering_subtree
378          collapse_zero_length_branches          collapse_zero_length_branches
379    
380          newick_insert_at_node          newick_insert_at_node
381          newick_insert_between_nodes          newick_insert_between_nodes
382    
383            root_neighborhood_representative_tree
384            root_neighborhood_representative_tips
385            tip_neighborhood_representative_tree
386            tip_neighborhood_representative_tips
387    
388          writeNewickTree          writeNewickTree
389          fwriteNewickTree          fwriteNewickTree
390          strNewickTree          strNewickTree
# Line 362  Line 430 
430          );          );
431    
432    
 use gjolists qw(  
         common_prefix  
         unique_suffixes  
   
         duplicates  
         random_order  
   
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
433  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
434  #  Internally used definitions  #  Internally used definitions
435  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 386  Line 439 
439    
440    
441  #===============================================================================  #===============================================================================
442  #  Interconvert Overbeek and gjonewick trees:  #  Interconvert overbeek and gjonewick trees:
443  #===============================================================================  #===============================================================================
444    
445    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
446    
447    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
448    
449  sub overbeek_to_gjonewick  sub overbeek_to_gjonewick
450  {  {
451      return () unless ref( $_[0] ) eq 'ARRAY';      return () unless ref( $_[0] ) eq 'ARRAY';
# Line 408  Line 465 
465      return $node;      return $node;
466  }  }
467    
468    
469  #===============================================================================  #===============================================================================
470  #  Extract tree structure values:  #  Extract tree structure values:
471  #===============================================================================  #===============================================================================
# Line 428  Line 486 
486  #  #
487  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
488    
489  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
490  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
491  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
492  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
493  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
494  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
495  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
496  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
497    
498  sub newick_desc_list {  sub newick_desc_list {
499      my $node = $_[0];      my $node = $_[0];
# Line 641  Line 699 
699  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
700  #  List of tip nodes:  #  List of tip nodes:
701  #  #
702  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
703    # \@tips = newick_tip_ref_list( $noderef )
704  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
705  sub newick_tip_ref_list {  sub newick_tip_ref_list {
706      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 658  Line 717 
717          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
718      }      }
719    
720      @list;      wantarray ? @list : \@list;
721  }  }
722    
723    
# Line 666  Line 725 
725  #  List of tips:  #  List of tips:
726  #  #
727  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
728    # \@tips = newick_tip_list( $node )
729  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
730  sub newick_tip_list {  sub newick_tip_list {
731      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
732        wantarray ? @tips : \@tips;
733  }  }
734    
735    
# Line 707  Line 768 
768  #  List of duplicated tip labels.  #  List of duplicated tip labels.
769  #  #
770  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
771    # \@tips = newick_duplicated_tips( $node )
772  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
773  sub newick_duplicated_tips {  sub newick_duplicated_tips {
774      gjolists::duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
775        wantarray ? @tips : \@tips;
776  }  }
777    
778    
# Line 740  Line 803 
803  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
804  #  #
805  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
806    # \@tips = newick_shared_tips( $tree1, $tree2 )
807  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
808  sub newick_shared_tips {  sub newick_shared_tips {
809      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
810      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
811      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
812      gjolists::intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
813        wantarray ? @tips : \@tips;
814  }  }
815    
816    
# Line 767  Line 832 
832    
833    
834  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
835    #  Hash of tip nodes and corresponding distances from root:
836    #
837    #   %tip_distances = newick_tip_distances( $node )
838    #  \%tip_distances = newick_tip_distances( $node )
839    #-------------------------------------------------------------------------------
840    sub newick_tip_distances
841    {
842        my ( $node, $x, $hash ) = @_;
843        my $root = ! $hash;
844        ref( $hash ) eq 'HASH' or $hash = {};
845    
846        $x ||= 0;
847        $x  += newick_x( $node ) || 0;
848    
849        #  Is it a tip?
850    
851        my $n_desc = newick_n_desc( $node );
852        if ( ! $n_desc )
853        {
854            $hash->{ newick_lbl( $node ) } = $x;
855            return $hash;
856        }
857    
858        #  Tree rooted on tip?
859    
860        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
861        {
862            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
863        }
864    
865        foreach ( newick_desc_list( $node ) )
866        {
867            newick_tip_distances( $_, $x, $hash );
868        }
869    
870        wantarray ? %$hash : $hash;
871    }
872    
873    
874    #-------------------------------------------------------------------------------
875  #  Tree max X.  #  Tree max X.
876  #  #
877  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 910  Line 1015 
1015    
1016      else      else
1017      {      {
1018          my ( $n1, $x1 ) = describe_desc( $dl->[0] );          my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1019          my ( $n2, $x2 ) = describe_desc( $dl->[1] );          my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1020    
1021          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1022          if ( @$n2 == 2 )          if ( @$n2 == 2 )
# Line 926  Line 1031 
1031  }  }
1032    
1033    
1034  sub describe_desc  sub describe_descendant
1035  {  {
1036      my $node = shift;      my $node = shift;
1037    
# Line 951  Line 1056 
1056      #  Return the two lowest of those (the third will come from the      #  Return the two lowest of those (the third will come from the
1057      #  other side of the original node).      #  other side of the original node).
1058    
     else  
     {  
1059          my @rep_tips = sort { lc $a cmp lc $b }          my @rep_tips = sort { lc $a cmp lc $b }
1060                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1061                         @$dl;                         @$dl;
1062          return ( [ @rep_tips[0,1] ], $x );          return ( [ @rep_tips[0,1] ], $x );
1063      }      }
 }  
1064    
1065    
1066  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 967  Line 1069 
1069  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1070  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1071  #  #
1072  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1073  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1074  sub std_node_name {  sub std_node_name {
1075      my $tree = $_[0];      my $tree = $_[0];
# Line 985  Line 1087 
1087      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1088    
1089      my @rest = newick_tip_list( $tree );      my @rest = newick_tip_list( $tree );
1090      my @best = map {      my @best = map
1091              {
1092              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1093              @rest = gjolists::set_difference( \@rest, \@tips );              @rest = &set_difference( \@rest, \@tips );
1094              $tips[0];              $tips[0];
1095          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1096    
# Line 1075  Line 1178 
1178      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1179      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1180         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1181         if ( @path ) { return @path }          return @path if @path;
1182      }      }
1183    
1184      ();  #  Not found      ();  #  Not found
# Line 1106  Line 1209 
1209      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1210    
1211      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1212      my @p12 = gjolists::common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1213      my @p13 = gjolists::common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1214      my @p23 = gjolists::common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1215    
1216      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1217      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 1159  Line 1262 
1262      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1263    
1264      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1265      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1266      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1267      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1268    
# Line 1184  Line 1287 
1287      my @p2 = path_to_node( $node, $node2 ) or return undef;      my @p2 = path_to_node( $node, $node2 ) or return undef;
1288    
1289      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1290      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1291      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1292      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1293    
# Line 1435  Line 1538 
1538    
1539    
1540  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1541    #  Modify all branch lengths by a function.
1542    #
1543    #     $node = newick_modify_branches( $node, \&function )
1544    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1545    #
1546    #  Function must have form
1547    #
1548    #     $x2 = &$function( $x1 )
1549    #     $x2 = &$function( $x1, @$func_parms )
1550    #
1551    #-------------------------------------------------------------------------------
1552    sub newick_modify_branches {
1553        my ( $node, $func, $parm ) = @_;
1554    
1555        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1556        foreach ( newick_desc_list( $node ) )
1557        {
1558            newick_modify_branches( $_, $func, $parm )
1559        }
1560    
1561        $node;
1562    }
1563    
1564    
1565    #-------------------------------------------------------------------------------
1566  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1567  #  #
1568  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1582  Line 1710 
1710      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1711      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1712    
     #  Reorder this subtree:  
   
1713      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1714      if    ( $dir < 0 ) {                   #  Big group first  
1715          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1716    
1717        if ( $dir )
1718        {
1719            #  Big group first
1720            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1721    
1722            my ( @dl1, @dl2 );
1723            for ( my $i = 0; $i < $nd; $i++ ) {
1724                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1725      }      }
1726      elsif ( $dir > 0 ) {                   #  Small group first  
1727          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1728                                    : ( @dl2, reverse @dl1 );
1729      }      }
1730    
1731      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1621  Line 1757 
1757  #  #
1758  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1759  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1760  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1761    {
1762      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1763      my %cnt;      my %cnt;
1764    
# Line 1641  Line 1778 
1778  #           = 0 for no change, and  #           = 0 for no change, and
1779  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1780  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1781  sub reorder_against_tip_count {  sub reorder_against_tip_count
1782    {
1783      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1784    
1785      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1680  Line 1818 
1818  #  #
1819  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1820  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1821  sub random_order_newick_tree {  sub random_order_newick_tree
1822    {
1823      my ( $node ) = @_;      my ( $node ) = @_;
1824    
1825      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1689  Line 1828 
1828      #  Reorder this subtree:      #  Reorder this subtree:
1829    
1830      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1831      @$dl_ref = gjolists::random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1832    
1833      #  Reorder descendants:      #  Reorder descendants:
1834    
# Line 1704  Line 1843 
1843  #  #
1844  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1845  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1846  sub reroot_newick_by_path {  sub reroot_newick_by_path
1847    {
1848      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1849      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1850    
# Line 1812  Line 1952 
1952    
1953    
1954  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1955    #  Reroot a newick tree along the path between 2 nodes:
1956    #
1957    #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
1958    #-------------------------------------------------------------------------------
1959    sub reroot_newick_between_nodes
1960    {
1961        my ( $tree, $node1, $node2, $fraction ) = @_;
1962        array_ref( $tree ) or return undef;
1963        $fraction >= 0 && $fraction <= 1 or return undef;
1964    
1965        #  Find the paths to the nodes:
1966    
1967        my @path1 = path_to_node( $tree, $node1 ) or return undef;
1968        my @path2 = path_to_node( $tree, $node2 ) or return undef;
1969    
1970        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
1971    }
1972    
1973    
1974    #-------------------------------------------------------------------------------
1975    #  Reroot a newick tree along the path between 2 nodes:
1976    #
1977    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
1978    #-------------------------------------------------------------------------------
1979    sub reroot_newick_between_node_refs
1980    {
1981        my ( $tree, $node1, $node2, $fraction ) = @_;
1982        array_ref( $tree ) or return undef;
1983    
1984        #  Find the paths to the nodes:
1985    
1986        my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
1987        my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
1988    
1989        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
1990    }
1991    
1992    
1993    #-------------------------------------------------------------------------------
1994    #  Reroot a newick tree along the path between 2 nodes defined by paths:
1995    #
1996    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
1997    #-------------------------------------------------------------------------------
1998    sub reroot_newick_between_nodes_by_path
1999    {
2000        my ( $tree, $path1, $path2, $fraction ) = @_;
2001        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
2002           or return undef;
2003        $fraction >= 0 && $fraction <= 1 or return undef;
2004    
2005        my @path1 = @$path1;
2006        my @path2 = @$path2;
2007    
2008        #  Trim the common prefix, saving it:
2009    
2010        my @prefix = ();
2011        while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2012        {
2013            push @prefix, splice( @path1, 0, 2 );
2014            splice( @path2, 0, 2 );
2015        }
2016    
2017        my ( @path, $dist );
2018        if    ( @path1 < 3 )
2019        {
2020            @path2 >= 3 or return undef;              # node1 = node2
2021            $dist = $fraction * newick_path_length( @path2 );
2022            @path = @path2;
2023        }
2024        elsif ( @path2 < 3 )
2025        {
2026            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2027            @path = @path1;
2028        }
2029        else
2030        {
2031            my $dist1 = newick_path_length( @path1 );
2032            my $dist2 = newick_path_length( @path2 );
2033            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2034            @path = ( $dist <= 0 ) ? @path1 : @path2;
2035            $dist = abs( $dist );
2036        }
2037    
2038        #  Descend tree until we reach the insertion branch:
2039    
2040        my $x;
2041        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2042        {
2043            $dist -= $x;
2044            push @prefix, splice( @path, 0, 2 );
2045        }
2046    
2047        #  Insert the new node:
2048    
2049        my $newnode = [ [ $path[2] ], undef, $dist ];
2050        set_newick_desc_i( $path[0], $path[1], $newnode );
2051        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2052    
2053        #  We can now build the path from root to the new node
2054    
2055        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2056    }
2057    
2058    
2059    #-------------------------------------------------------------------------------
2060  #  Move root of tree to an approximate midpoint.  #  Move root of tree to an approximate midpoint.
2061  #  #
2062  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $newtree = reroot_newick_to_approx_midpoint( $tree )
# Line 1823  Line 2068 
2068    
2069      my $dists1 = average_to_tips_1( $tree );      my $dists1 = average_to_tips_1( $tree );
2070    
2071      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint
2072        #  cadidates as a list of [ $node1, $node2, $fraction ]
2073    
2074        my @mids = average_to_tips_2( $dists1, undef, undef );
2075    
2076        #  Reroot to first midpoint candidate
2077    
2078        return $tree if ! @mids;
2079        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2080        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2081    }
2082    
2083    
2084    #-------------------------------------------------------------------------------
2085    #  Move root of tree to a midpoint.
2086    #
2087    #  $newtree = reroot_newick_to_midpoint( $tree )
2088    #-------------------------------------------------------------------------------
2089    sub reroot_newick_to_midpoint {
2090        my ( $tree ) = @_;
2091    
2092        #  Compile average tip to node distances assending
2093    
2094        my $dists1 = average_to_tips_1( $tree );
2095    
2096      my $node = average_to_tips_2( $dists1, undef, undef );      #  Compile average tip to node distances descending, returning midpoint
2097        #  [ $node1, $node2, $fraction ]
2098    
2099      #  Reroot      my @mids = average_to_tips_2( $dists1, undef, undef );
2100    
2101      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2102  }  }
2103    
2104    
2105    #-------------------------------------------------------------------------------
2106    #  Compile average tip to node distances assending
2107    #-------------------------------------------------------------------------------
2108  sub average_to_tips_1 {  sub average_to_tips_1 {
2109      my ( $node ) = @_;      my ( $node ) = @_;
2110    
# Line 1843  Line 2115 
2115          foreach ( @desc_dists ) { $x_below += $_->[0] }          foreach ( @desc_dists ) { $x_below += $_->[0] }
2116          $x_below /= @desc_dists;          $x_below /= @desc_dists;
2117      }      }
2118    
2119      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2120      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2121    
# Line 1850  Line 2123 
2123  }  }
2124    
2125    
2126    #-------------------------------------------------------------------------------
2127    #  Compile average tip to node distances descending, returning midpoint as
2128    #  [ $node1, $node2, $fraction_of_dist_between ]
2129    #-------------------------------------------------------------------------------
2130  sub average_to_tips_2 {  sub average_to_tips_2 {
2131      my ( $dists1, $x_above, $anc_node ) = @_;      my ( $dists1, $x_above, $anc_node ) = @_;
2132      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2133    
2134      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2135    
2136      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2137      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2138      {      {
2139          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1872  Line 2145 
2145    
2146          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2147          {          {
2148              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2149          }              #  the way from $node to $anc_node:
2150          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2151          {                                     : 0.5;
2152              return undef;              push @mids, [ $node, $anc_node, $fract ];
2153          }          }
2154      }      }
2155    
2156      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2157    
2158      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2159      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1890  Line 2163 
2163          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2164    
2165          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2166          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2167      }      }
2168    
2169      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2170  }  }
2171    
2172    
# Line 1908  Line 2178 
2178  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2179      my ( $tree ) = @_;      my ( $tree ) = @_;
2180    
2181        #  Compile average tip to node distances assending from tips
2182    
2183        my $dists1 = average_to_tips_1_w( $tree );
2184    
2185        #  Compile average tip to node distances descending, returning midpoints
2186    
2187        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2188    
2189        #  Reroot to first midpoint candidate
2190    
2191        return $tree if ! @mids;
2192        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2193        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2194    }
2195    
2196    
2197    #-------------------------------------------------------------------------------
2198    #  Move root of tree to an approximate midpoint.  Weight by tips.
2199    #
2200    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2201    #-------------------------------------------------------------------------------
2202    sub reroot_newick_to_midpoint_w {
2203        my ( $tree ) = @_;
2204    
2205      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2206    
2207      my $dists1 = average_to_tips_1_w( $tree );      my $dists1 = average_to_tips_1_w( $tree );
2208    
2209      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2210    
2211      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2212    
2213      #  Reroot      #  Reroot at first candidate midpoint
2214    
2215      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2216  }  }
2217    
2218    
# Line 1939  Line 2233 
2233          }          }
2234          $x_below /= $n_below;          $x_below /= $n_below;
2235      }      }
2236    
2237      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2238      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2239    
# Line 1952  Line 2247 
2247    
2248      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2249    
2250      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2251      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2252      {      {
2253          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1964  Line 2255 
2255          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2256          #  path from the root of the current tree).          #  path from the root of the current tree).
2257          #          #
2258          #  Is the root in the current branch?          #  Is their a root in the current branch?
2259    
2260          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2261          {          {
2262              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2263          }              #  the way from $node to $anc_node:
2264          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2265          {                                     : 0.5;
2266              return undef;              push @mids, [ $node, $anc_node, $fract ];
2267          }          }
2268      }      }
2269    
# Line 1992  Line 2283 
2283    
2284          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2285                                   : 0;                                   : 0;
2286          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2287      }      }
2288    
2289      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2290  }  }
2291    
2292    
# Line 2313  Line 2601 
2601    
2602    
2603  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2604    #  Produce a potentially rooted subtree with the desired tips:
2605    #
2606    #     Except for (some) tip nodes, the tree produced is a copy.
2607    #     There is no check that requested tips exist.
2608    #
2609    #  $newtree = rooted_newick_subtree( $tree,  @tips )
2610    #  $newtree = rooted_newick_subtree( $tree, \@tips )
2611    #-------------------------------------------------------------------------------
2612    sub rooted_newick_subtree {
2613        my ( $tr, @tips ) = @_;
2614        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2615    
2616        if ( @tips < 2 ) { return undef }
2617        my $keephash = { map { ( $_, 1 ) } @tips };
2618        my $tr2 = subtree1( $tr, $keephash );
2619        $tr2->[2] = undef if $tr2;                   # undef root branch length
2620        $tr2;
2621    }
2622    
2623    
2624    #-------------------------------------------------------------------------------
2625  #  Produce a subtree with the desired tips:  #  Produce a subtree with the desired tips:
2626  #  #
2627  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
# Line 2386  Line 2695 
2695  }  }
2696    
2697    
2698    #-------------------------------------------------------------------------------
2699    #  The smallest subtree of rooted tree that includes @tips:
2700    #
2701    #    $node = newick_covering_subtree( $tree,  @tips )
2702    #    $node = newick_covering_subtree( $tree, \@tips )
2703    #-------------------------------------------------------------------------------
2704    
2705    sub newick_covering_subtree {
2706        my $tree = shift;
2707        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2708    
2709        #  Return smallest covering node, if any:
2710    
2711        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2712    }
2713    
2714    
2715    sub newick_covering_subtree_1 {
2716        my ( $node, $tips ) = @_;
2717        my $n_cover = 0;
2718        my @desc = newick_desc_list( $node );
2719        if ( @desc )
2720        {
2721            foreach ( @desc )
2722            {
2723                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2724                return ( $subtree, $n ) if $subtree;
2725                $n_cover += $n;
2726            }
2727        }
2728        elsif ( $tips->{ newick_lbl( $node ) } )
2729        {
2730            $n_cover++;
2731        }
2732    
2733        #  If all tips are covered, return node
2734    
2735        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2736    }
2737    
2738    
2739    #===============================================================================
2740    #
2741    #  Representative subtrees
2742    #
2743    #===============================================================================
2744    #  Find subtree of size n representating vicinity of the root:
2745    #
2746    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2747    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2748    #
2749    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2750    #  consequences on downstream programs.
2751    #-------------------------------------------------------------------------------
2752    sub root_neighborhood_representative_tree
2753    {
2754        my ( $tree, $n, $tip_priority ) = @_;
2755        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2756        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2757    
2758        $tip_priority ||= default_tip_priority( $tree );
2759        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2760                   root_proximal_newick_subtrees( $tree, $n );
2761    
2762        newick_subtree( copy_newick_tree( $tree ), \@tips );
2763    }
2764    
2765    
2766    #-------------------------------------------------------------------------------
2767    #  Find n tips to represent tree lineages in vicinity of another tip.
2768    #  Default tip priority is short total branch length.
2769    #
2770    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2771    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2772    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2773    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2774    #-------------------------------------------------------------------------------
2775    sub root_neighborhood_representative_tips
2776    {
2777        my ( $tree, $n, $tip_priority ) = @_;
2778        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2779    
2780        my @tips;
2781        if ( newick_tip_count( $tree ) <= $n )
2782        {
2783            @tips = newick_tip_list( $tree );
2784        }
2785        else
2786        {
2787            $tip_priority ||= default_tip_priority( $tree );
2788            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2789                    root_proximal_newick_subtrees( $tree, $n );
2790        }
2791    
2792        wantarray ? @tips : \@tips;
2793    }
2794    
2795    
2796    #-------------------------------------------------------------------------------
2797    #  Find subtree of size n representating vicinity of a tip:
2798    #
2799    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2800    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2801    #-------------------------------------------------------------------------------
2802    sub tip_neighborhood_representative_tree
2803    {
2804        my ( $tree, $tip, $n, $tip_priority ) = @_;
2805        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2806        newick_tip_in_tree( $tree, $tip ) or return undef;
2807    
2808        my $tree1 = copy_newick_tree( $tree );
2809        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2810        {
2811            return prune_from_newick( $tree1, $tip )
2812        }
2813    
2814        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2815        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2816        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2817        newick_subtree( copy_newick_tree( $tree ), \@tips );
2818    }
2819    
2820    
2821    #-------------------------------------------------------------------------------
2822    #  Find n tips to represent tree lineages in vicinity of another tip.
2823    #  Default tip priority is short total branch length.
2824    #
2825    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2826    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2827    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2828    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2829    #-------------------------------------------------------------------------------
2830    sub tip_neighborhood_representative_tips
2831    {
2832        my ( $tree, $tip, $n, $tip_priority ) = @_;
2833        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2834        newick_tip_in_tree( $tree, $tip ) or return undef;
2835    
2836        my @tips = newick_tip_list( $tree );
2837        if ( newick_tip_count( $tree ) - 1 <= $n )
2838        {
2839            @tips = grep { $_ ne $tip } @tips;
2840        }
2841        else
2842        {
2843            my $tree1 = copy_newick_tree( $tree );
2844            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2845            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2846            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2847        }
2848    
2849        wantarray ? @tips : \@tips;
2850    }
2851    
2852    
2853    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2854    #  Anonymous hash of the negative distance from root to each tip:
2855    #
2856    #   \%tip_priority = default_tip_priority( $tree )
2857    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2858    sub default_tip_priority
2859    {
2860        my ( $tree ) = @_;
2861        my $tip_distances = newick_tip_distances( $tree ) || {};
2862        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2863    }
2864    
2865    
2866    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2867    #  Select a tip from a subtree base on a priority value:
2868    #
2869    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2870    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2871    sub representative_tip_of_newick_node
2872    {
2873        my ( $node, $tip_priority ) = @_;
2874        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2875                      map  { [ $_, $tip_priority->{ $_ } ] }
2876                      newick_tip_list( $node );
2877        $tip->[0];                                   # Label from label-priority pair
2878    }
2879    
2880    
2881    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2882    #  Find n subtrees focused around the root of a tree.  Typically each will
2883    #  then be reduced to a single tip to make a representative tree:
2884    #
2885    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2886    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887    sub root_proximal_newick_subtrees
2888    {
2889        my ( $tree, $n ) = @_;
2890        my $node_start_end = newick_branch_intervals( $tree );
2891        n_representative_branches( $n, $node_start_end );
2892    }
2893    
2894    
2895    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2896    #   @node_start_end = newick_branch_intervals( $tree )
2897    #  \@node_start_end = newick_branch_intervals( $tree )
2898    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2899    sub newick_branch_intervals
2900    {
2901        my ( $node, $parent_x ) = @_;
2902        $parent_x ||= 0;
2903        my ( $desc, undef, $dx ) = @$node;
2904        my $x = $parent_x + $dx;
2905        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2906        my @intervals = ( $interval,
2907                          map { &newick_branch_intervals( $_, $x ) } @$desc
2908                        );
2909        return wantarray ? @intervals : \@intervals;
2910    }
2911    
2912    
2913    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2914    #   @ids = n_representative_branches( $n,  @id_start_end )
2915    #   @ids = n_representative_branches( $n, \@id_start_end )
2916    #  \@ids = n_representative_branches( $n,  @id_start_end )
2917    #  \@ids = n_representative_branches( $n, \@id_start_end )
2918    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2919    sub n_representative_branches
2920    {
2921        my $n = shift;
2922        #  Sort intervals by start point:
2923        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2924                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2925        my @active = ();
2926        my ( $interval, $current_point );
2927        foreach $interval ( @unprocessed )
2928        {
2929            $current_point = $interval->[1];
2930            #  Filter out intervals that have ended.  This is N**2 in the number
2931            #  of representatives.  Fixing this would require maintaining a sorted
2932            #  active list.
2933            @active = grep { $_->[2] > $current_point } @active;
2934            push @active, $interval;
2935            last if ( @active >= $n );
2936        }
2937    
2938        my @ids = map { $_->[0] } @active;
2939        return wantarray() ? @ids : \@ids;
2940    }
2941    
2942    
2943  #===============================================================================  #===============================================================================
2944  #  #
2945  #  Tree writing and reading  #  Tree writing and reading
# Line 2553  Line 3107 
3107      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3108      my $tree;      my $tree;
3109      my @lines = ();      my @lines = ();
3110      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3111      {      {
3112          chomp;          chomp;
3113          push @lines, $_;          push @lines, $_;
# Line 2575  Line 3129 
3129      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3130      my @trees = ();      my @trees = ();
3131      my @lines = ();      my @lines = ();
3132      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3133      {      {
3134          chomp;          chomp;
3135          push @lines, $_;          push @lines, $_;
# Line 2762  Line 3316 
3316      #  Loop while it is a comment:      #  Loop while it is a comment:
3317      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3318          $ind++;          $ind++;
3319            my $depth = 1;
3320            my $ind2  = $ind;
3321    
3322          #  Find end          #  Find end
3323          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3324            {
3325                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3326                {
3327                    $ind2 += length( $1 );  #  Points at char just past [
3328                    $depth++;               #  If nested comments are allowed
3329                }
3330                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3331                {
3332                    $ind2 += length( $1 );  #  Points at char just past ]
3333                    $depth--;
3334                }
3335                else
3336                {
3337              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3338                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3339          }          }
3340          my $comment = $1;          }
3341    
3342          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3343          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3344    
3345          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3346    
3347          #  Skip white space          #  Skip white space
3348          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2792  Line 3361 
3361  #===============================================================================  #===============================================================================
3362  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3363  #  #
3364  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3365  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3366  #     $width  the approximate characters for the tree without labels  #
3367  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3368  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3369    #     $width  # the approximate characters for the tree without labels (D = 68)
3370    #     $min_dx # the minimum horizontal branch length (D = 2)
3371    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3372    #
3373    #  Options:
3374    #
3375    #    dy     => nat_number    # the vertical space per taxon
3376    #    chars  => key           # line drawing character set:
3377    #                            #     html_unicode
3378    #                            #     text (default)
3379    #    min_dx => whole_number  # the minimum horizontal branch length
3380    #    width  => whole_number  # approximate tree width without labels
3381  #  #
 #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  
3382  #===============================================================================  #===============================================================================
3383  sub printer_plot_newick {  sub printer_plot_newick
3384      my ( $node, $file, $width, $min_dx, $dy ) = @_;  {
3385        my ( $node, $file, @opts ) = @_;
3386    
3387      my ( $fh, $close ) = open_output( $file );      my ( $fh, $close ) = open_output( $file );
3388      $fh or return;      $fh or return;
3389    
3390      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3391                            && $opts[0]->{ chars }
3392                            && $opts[0]->{ chars } =~ /html/;
3393        print $fh '<PRE>' if $html;
3394        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3395        print $fh "</PRE>\n" if $html;
3396    
3397      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3398  }  }
3399    
3400    
3401  #===============================================================================  #===============================================================================
3402    #  Character sets for printer plot trees:
3403    #-------------------------------------------------------------------------------
3404    
3405    my %char_set =
3406      ( text1     => { space  => ' ',
3407                       horiz  => '-',
3408                       vert   => '|',
3409                       el_d_r => '/',
3410                       el_u_r => '\\',
3411                       el_d_l => '\\',
3412                       el_u_l => '/',
3413                       tee_l  => '+',
3414                       tee_r  => '+',
3415                       tee_u  => '+',
3416                       tee_d  => '+',
3417                       half_l => '-',
3418                       half_r => '-',
3419                       half_u => '|',
3420                       half_d => '|',
3421                       cross  => '+',
3422                     },
3423        text2     => { space  => ' ',
3424                       horiz  => '-',
3425                       vert   => '|',
3426                       el_d_r => '+',
3427                       el_u_r => '+',
3428                       el_d_l => '+',
3429                       el_u_l => '+',
3430                       tee_l  => '+',
3431                       tee_r  => '+',
3432                       tee_u  => '+',
3433                       tee_d  => '+',
3434                       half_l => '-',
3435                       half_r => '-',
3436                       half_u => '|',
3437                       half_d => '|',
3438                       cross  => '+',
3439                     },
3440        html_box  => { space  => '&nbsp;',
3441                       horiz  => '&#9472;',
3442                       vert   => '&#9474;',
3443                       el_d_r => '&#9484;',
3444                       el_u_r => '&#9492;',
3445                       el_d_l => '&#9488;',
3446                       el_u_l => '&#9496;',
3447                       tee_l  => '&#9508;',
3448                       tee_r  => '&#9500;',
3449                       tee_u  => '&#9524;',
3450                       tee_d  => '&#9516;',
3451                       half_l => '&#9588;',
3452                       half_r => '&#9590;',
3453                       half_u => '&#9589;',
3454                       half_d => '&#9591;',
3455                       cross  => '&#9532;',
3456                     },
3457        utf8_box  => { space  => ' ',
3458                       horiz  => chr(226) . chr(148) . chr(128),
3459                       vert   => chr(226) . chr(148) . chr(130),
3460                       el_d_r => chr(226) . chr(148) . chr(140),
3461                       el_u_r => chr(226) . chr(148) . chr(148),
3462                       el_d_l => chr(226) . chr(148) . chr(144),
3463                       el_u_l => chr(226) . chr(148) . chr(152),
3464                       tee_l  => chr(226) . chr(148) . chr(164),
3465                       tee_r  => chr(226) . chr(148) . chr(156),
3466                       tee_u  => chr(226) . chr(148) . chr(180),
3467                       tee_d  => chr(226) . chr(148) . chr(172),
3468                       half_l => chr(226) . chr(149) . chr(180),
3469                       half_r => chr(226) . chr(149) . chr(182),
3470                       half_u => chr(226) . chr(149) . chr(181),
3471                       half_d => chr(226) . chr(149) . chr(183),
3472                       cross  => chr(226) . chr(148) . chr(188),
3473                     },
3474      );
3475    
3476    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3477    $char_set{ html1 }->{ space } = '&nbsp;';
3478    
3479    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3480    $char_set{ html2 }->{ space } = '&nbsp;';
3481    
3482    #  Define some synonyms
3483    
3484    $char_set{ html } = $char_set{ html_box };
3485    $char_set{ line } = $char_set{ utf8_box };
3486    $char_set{ symb } = $char_set{ utf8_box };
3487    $char_set{ text } = $char_set{ text1 };
3488    $char_set{ utf8 } = $char_set{ utf8_box };
3489    
3490    #  Define tree formats and synonyms
3491    
3492    my %tree_format =
3493        ( text         => 'text',
3494          tree_tab_lbl => 'tree_tab_lbl',
3495          tree_lbl     => 'tree_lbl',
3496          chrlist_lbl  => 'chrlist_lbl',
3497          raw          => 'chrlist_lbl',
3498        );
3499    
3500    #===============================================================================
3501  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3502  #  #
3503  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3504  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3505  #     $min_dx the minimum horizontal branch length  #
3506  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3507    #     $width  # the approximate characters for the tree without labels (D = 68)
3508    #     $min_dx # the minimum horizontal branch length (D = 2)
3509    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3510    #
3511    #  Options:
3512    #
3513    #    chars  => keyword       # the output character set for the tree
3514    #    dy     => nat_number    # the vertical space per taxon
3515    #    format => keyword       # output format of each line
3516    #    min_dx => whole_number  # the minimum horizontal branch length
3517    #    width  => whole_number  # approximate tree width without labels
3518    #
3519    #  Character sets:
3520    #
3521    #    html       #  synonym of html1
3522    #    html_box   #  html encoding of unicode box drawing characters
3523    #    html1      #  text1 with nonbreaking spaces
3524    #    html2      #  text2 with nonbreaking spaces
3525    #    line       #  synonym of utf8_box
3526    #    raw        #  pass out the internal representation
3527    #    symb       #  synonym of utf8_box
3528    #    text       #  synonym of text1 (Default)
3529    #    text1      #  ascii characters: - + | / \ and space
3530    #    text2      #  ascii characters: - + | + + and space
3531    #    utf8       #  synonym of utf8_box
3532    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3533    #
3534    #  Formats for row lines:
3535    #
3536    #    text           #    $textstring              # Default
3537    #    tree_tab_lbl   #    $treestr \t $labelstr
3538    #    tree_lbl       # [  $treestr,  $labelstr ]
3539    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3540    #    raw            #  synonym of chrlist_lbl
3541  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3542  #===============================================================================  #===============================================================================
3543  sub text_plot_newick {  sub text_plot_newick
3544      my ( $node, $width, $min_dx, $dy ) = @_;  {
3545        my $node = shift @_;
3546      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3547      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3548      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3549      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3550        {
3551            $opts = shift;
3552        }
3553        else
3554        {
3555            ( $width, $min_dx, $dy ) = @_;
3556            $opts = {};
3557        }
3558    
3559        $chars = $opts->{ chars } || '';
3560        my $charH;
3561        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3562        my $is_box = $charH eq $char_set{ html_box }
3563                  || $charH eq $char_set{ utf8_box }
3564                  || $chars eq 'raw';
3565    
3566        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3567        $fmt = $tree_format{ $fmt || '' } || 'text';
3568    
3569        $dy    ||= $opts->{ dy     } ||  1;
3570        $width ||= $opts->{ width  } || 68;
3571        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3572        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3573    
3574        #  Layout the tree:
3575    
3576      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3577      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2835  Line 3580 
3580      my $hash = {};      my $hash = {};
3581      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3582    
3583      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3584    
3585      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3586      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3587        foreach ( ( $y1 .. $y2 ) )
3588        {
3589            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3590            my $lbl  = '';
3591            if ( @$line )
3592            {
3593                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3594                #  Translate tree characters
3595                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3596            }
3597    
3598            # Convert to requested output format:
3599    
3600            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3601                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3602                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3603                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3604                       :                          ();
3605        }
3606    
3607        # if ( $cells )
3608        # {
3609        #     my $nmax = 0;
3610        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3611        #     foreach ( @lines )
3612        #     {
3613        #         @$_ = map { "<TD>$_</TD>" } @$_;
3614        #         my $span = $nmax - @$_ + 1;
3615        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3616        #     }
3617        # }
3618        # elsif ( $tables )
3619        # {
3620        #     my $nmax = 0;
3621        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3622        #     foreach ( @lines )
3623        #     {
3624        #         @$_ = map { "<TD>$_</TD>" } @$_;
3625        #         my $span = $nmax - @$_ + 1;
3626        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3627        #     }
3628        # }
3629    
3630        wantarray ? @lines : \@lines;
3631  }  }
3632    
3633    
3634  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3635  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3636  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3637  sub layout_printer_plot {  sub layout_printer_plot
3638      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3639        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3640      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3641      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3642    
3643      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3644      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3645          $dx *= $x_scale;          $dx *= $x_scale;
3646          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3647      }      }
3648      else {      else {
3649          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2881  Line 3670 
3670          $ymax = $y0;          $ymax = $y0;
3671    
3672          foreach ( @dl ) {          foreach ( @dl ) {
3673              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3674                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3675                                                            );
3676              push @ylist, $yi;              push @ylist, $yi;
3677              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3678          }          }
# Line 2891  Line 3682 
3682    
3683          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3684          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3685          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3686    
3687            #  Handle special case of internal node label. Put it between subtrees.
3688    
3689            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3690                #  Find the descendents $i1 and $i2 to put the branch between
3691                my $i2 = 1;
3692                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3693                my $i1 = $i2 - 1;
3694                #  Get bottom of subtree1 and top of subtree2:
3695                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3696                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3697                #  Midway between bottom of subtree1 and top of subtree2, with
3698                #  preferred rounding direction
3699                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3700            }
3701      }      }
3702    
3703      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2901  Line 3707 
3707  }  }
3708    
3709    
3710  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #  What symbol do we get if we add a leftward line to some other symbol?
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree {  
     my ( $node, $prefix ) = @_;  
     defined( $prefix ) or $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }  
     $prefix or print STDERR "\n";  
 }  
   
   
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree_hash {  
     my ( $node, $hash, $prefix ) = @_;  
     defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }  
 }  
3711    
3712    my %with_left_line = ( space  => 'half_l',
3713                           horiz  => 'horiz',
3714                           vert   => 'tee_l',
3715                           el_d_r => 'tee_d',
3716                           el_u_r => 'tee_u',
3717                           el_d_l => 'el_d_l',
3718                           el_u_l => 'el_u_l',
3719                           tee_l  => 'tee_l',
3720                           tee_r  => 'cross',
3721                           tee_u  => 'tee_u',
3722                           tee_d  => 'tee_d',
3723                           half_l => 'half_l',
3724                           half_r => 'horiz',
3725                           half_u => 'el_u_l',
3726                           half_d => 'el_d_l',
3727                           cross  => 'cross',
3728                         );
3729    
3730  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3731  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Produce a description of one line of a printer plot tree.
3732    #
3733    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3734    #
3735    #     \@line is the character descriptions accumulated so far, one per array
3736    #          element, except for a label, which can be any number of characters.
3737    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3738    #          then $line->[-2] is a label. The calling program translates the
3739    #          symbol names to output characters.
3740    #
3741    #     \@node is a newick tree node
3742    #     \%hash contains tree layout information
3743    #      $row  is the row number (y value) that we are building
3744    #      $symb is the plot symbol proposed for the current x and y position
3745    #      $ilbl is true if internal node labels are allowed
3746    #
3747  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3748  sub text_tree_row {  sub text_tree_row
3749      my ( $node, $hash, $row, $line, $symb ) = @_;  {
3750        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3751    
3752      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3753      if ( $row < $y1 || $row > $y2 ) { return $line }      if ( $row < $y1 || $row > $y2 ) { return $line }
3754    
3755      if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }      if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3756    
3757      if ( $row == $y ) {      if ( $row == $y ) {
3758          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");          while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
3759            push @$line, $symb,
3760                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
3761      }      }
3762    
3763      elsif ( $row > $yn1 && $row < $yn2 ) {      elsif ( $row > $yn1 && $row < $yn2 ) {
3764          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }          if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
3765          else { substr( $line, $x ) = "|" }          else               { $line->[$x] = 'vert' }
3766      }      }
3767    
3768      my @dl = newick_desc_list( $node );      my @dl = newick_desc_list( $node );
3769    
3770      if ( @dl < 1 ) {      if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
         $line .= " " . $node->[1];  
     }  
3771    
3772      else {      else {
3773          my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line          my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
3774          $list[ 0]->[1] = "/";          if ( @list > 1 ) { #  Fix top and bottom sympbols
3775          $list[-1]->[1] = "\\";              $list[ 0]->[1] = 'el_d_r';
3776                $list[-1]->[1] = 'el_u_r';
3777            }
3778            elsif ( @list ) {  # Only one descendent
3779                $list[ 0]->[1] = 'half_r';
3780            }
3781          foreach ( @list ) {          foreach ( @list ) {
3782              my ( $n, $s ) = @$_;              my ( $n, $s ) = @$_;
3783              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
3784                  $line = text_tree_row( $n, $hash, $row, $line, $s );                  $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
3785              }              }
3786           }           }
3787    
3788          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }          if ( $row == $y ) {
3789                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
3790                                                         : $with_left_line{ $line->[$x] };
3791                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
3792            }
3793      }      }
3794    
3795      return $line;      return $line;
3796  }  }
3797    
3798    
3799    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3800    #  Debug routine
3801    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3802    sub dump_tree {
3803        my ( $node, $prefix ) = @_;
3804        defined( $prefix ) or $prefix = "";
3805        print STDERR $prefix, join(", ", @$node), "\n";
3806        my @dl = $node->[0] ? @{$node->[0]} : ();
3807        foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }
3808        $prefix or print STDERR "\n";
3809    }
3810    
3811    
3812    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3813    #  Debug routine
3814    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3815    sub dump_tree_hash {
3816        my ( $node, $hash, $prefix ) = @_;
3817        defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";
3818        print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";
3819        my @dl = $node->[0] ? @{$node->[0]} : ();
3820        foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }
3821    }
3822    
3823    
3824  #===============================================================================  #===============================================================================
3825  #  Open an input file stream:  #  Open an input file stream:
3826  #  #
# Line 3012  Line 3862 
3862      return undef;      return undef;
3863  }  }
3864    
3865    
3866    #===============================================================================
3867    #  Some subroutines copied from gjolists
3868    #===============================================================================
3869    #  Return the common prefix of two lists:
3870    #
3871    #  @common = common_prefix( \@list1, \@list2 )
3872    #-----------------------------------------------------------------------------
3873    sub common_prefix
3874    {
3875        my ($l1, $l2) = @_;
3876        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3877        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3878    
3879        my $i = 0;
3880        my $l1_i;
3881        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
3882    
3883        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
3884    }
3885    
3886    
3887    #-----------------------------------------------------------------------------
3888    #  Return the unique suffixes of each of two lists:
3889    #
3890    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
3891    #-----------------------------------------------------------------------------
3892    sub unique_suffixes
3893    {
3894        my ($l1, $l2) = @_;
3895        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3896        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3897    
3898        my $i = 0;
3899        my @l1 = @$l1;
3900        my @l2 = @$l2;
3901        my $l1_i;
3902        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
3903    
3904        splice @l1, 0, $i;
3905        splice @l2, 0, $i;
3906        return ( \@l1, \@l2 );
3907    }
3908    
3909    
3910    #-------------------------------------------------------------------------------
3911    #  List of values duplicated in a list (stable in order by second occurance):
3912    #
3913    #  @dups = duplicates( @list )
3914    #-------------------------------------------------------------------------------
3915    sub duplicates
3916    {
3917        my %cnt = ();
3918        grep { ++$cnt{$_} == 2 } @_;
3919    }
3920    
3921    
3922    #-------------------------------------------------------------------------------
3923    #  Randomize the order of a list:
3924    #
3925    #  @random = random_order( @list )
3926    #-------------------------------------------------------------------------------
3927    sub random_order
3928    {
3929        my ( $i, $j );
3930        for ( $i = @_ - 1; $i > 0; $i-- )
3931        {
3932            $j = int( ($i+1) * rand() );
3933            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
3934        }
3935    
3936       @_;
3937    }
3938    
3939    
3940    #-----------------------------------------------------------------------------
3941    #  Intersection of two or more sets:
3942    #
3943    #  @intersection = intersection( \@set1, \@set2, ... )
3944    #-----------------------------------------------------------------------------
3945    sub intersection
3946    {
3947        my $set = shift;
3948        my @intersection = @$set;
3949    
3950        foreach $set ( @_ )
3951        {
3952            my %set = map { $_ => 1 } @$set;
3953            @intersection = grep { exists $set{ $_ } } @intersection;
3954        }
3955    
3956        @intersection;
3957    }
3958    
3959    
3960    #-----------------------------------------------------------------------------
3961    #  Elements in set 1, but not set 2:
3962    #
3963    #  @difference = set_difference( \@set1, \@set2 )
3964    #-----------------------------------------------------------------------------
3965    sub set_difference
3966    {
3967        my ($set1, $set2) = @_;
3968        my %set2 = map { $_ => 1 } @$set2;
3969        grep { ! ( exists $set2{$_} ) } @$set1;
3970    }
3971    
3972    
3973  1;  1;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.16

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3