[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Sun Feb 11 18:25:48 2007 UTC revision 1.20, Sat Aug 14 23:58:51 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2007 University of Chicago and Fellowship  # Copyright (c) 2003-2010 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 77  Line 79 
79  #  Tree format interconversion:  #  Tree format interconversion:
80  #===============================================================================  #===============================================================================
81  #  #
82    #  $bool      = is_overbeek_tree( $tree )
83    #  $bool      = is_gjonewick_tree( $tree )
84    #
85  #  $gjonewick = overbeek_to_gjonewick( $overbeek )  #  $gjonewick = overbeek_to_gjonewick( $overbeek )
86  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
87  #  #
# Line 107  Line 112 
112  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
113  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
114  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
115  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
116  #  #
117  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
118  #  #
# Line 118  Line 123 
123  #  #
124  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
125  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
126    # \@tiprefs = newick_tip_ref_list( $noderef )
127  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
128    # \@tips    = newick_tip_list( $noderef )
129    #
130  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
131  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
132    #
133  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
134    # \@tips    = newick_duplicated_tips( $noderef )
135    #
136  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
137    #
138  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
139    # \@tips    = newick_shared_tips( $tree1, $tree2 )
140  #  #
141  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
142    #
143    #  %tip_distances = newick_tip_distances( $noderef )
144    # \%tip_distances = newick_tip_distances( $noderef )
145    #
146  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
147  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
148  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
149    #
150    #  Provide a standard name by which two trees can be compared for same topology
151    #
152    #  $stdname = std_tree_name( $tree )
153  #  #
154  #  Tree tip insertion point (tip is on branch of length x that  #  Tree tip insertion point (tip is on branch of length x that
155  #  is inserted into branch connecting node1 and node2, a distance  #  is inserted into branch connecting node1 and node2, a distance
156  #  x1 from node1 and x2 from node2):  #  x1 from node1 and x2 from node2):
157  #  #
158  #  [ $node1, $x1, $node2, $x2, $x ]  #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
 #           = newick_tip_insertion_point( $tree, $tip )  
159  #  #
160  #  Standardized label for a node in terms of intersection of 3 lowest sorting  #  Standardized label for a node in terms of intersection of 3 lowest sorting
161  #  tips (sort is lower case):  #  tips (sort is lower case):
162  #  #
163  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
164  #  #
165  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
166  #  Paths from root of tree:  #  Paths from root of tree:
# Line 189  Line 209 
209  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
210  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
211  #  $node      = newick_rescale_branches( $node, $factor )  #  $node      = newick_rescale_branches( $node, $factor )
212    #  $node      = newick_modify_branches( $node, \&function )
213    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
214  #  #
215  #  Modify comments:  #  Modify comments:
216  #  #
# Line 206  Line 228 
228  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
229  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
230  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
231  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
232    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
233    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
234  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
235  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
236  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
237  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
238  #  #
239  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
240    #  $newtree = rooted_newick_subtree( $tree,  @tips )
241    #  $newtree = rooted_newick_subtree( $tree, \@tips )
242  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
243  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
244    #  $newtree = newick_covering_subtree( $tree,  @tips )
245    #  $newtree = newick_covering_subtree( $tree, \@tips )
246  #  #
247  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
248  #  #
# Line 222  Line 250 
250  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
251  #  #
252  #===============================================================================  #===============================================================================
253    #  Tree neighborhood: subtree of n tips to represent a larger tree.
254    #===============================================================================
255    #
256    #  Focus around root:
257    #
258    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
259    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
260    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
261    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
262    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
263    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
264    #
265    #  Focus around a tip insertion point (the tip is not in the subtree):
266    #
267    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
268    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
269    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
270    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
271    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
272    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
273    #
274    #===============================================================================
275  #  Tree reading and writing:  #  Tree reading and writing:
276  #===============================================================================  #===============================================================================
277    #  Write machine-readable trees:
278  #  #
279  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
280  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
# Line 231  Line 282 
282  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
283  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
284  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
285    #
286    #  Write human-readable trees:
287    #
288  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
289  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
290  #  #
291    #  Read trees:
292    #
293  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  $tree  = read_newick_tree( $file )  # reads to a semicolon
294  #  @trees = read_newick_trees( $file ) # reads to end of file  #  @trees = read_newick_trees( $file ) # reads to end of file
295  #  $tree  = parse_newick_tree_str( $string )  #  $tree  = parse_newick_tree_str( $string )
# Line 243  Line 299 
299    
300  use Carp;  use Carp;
301  use Data::Dumper;  use Data::Dumper;
302    use strict;
303    
304  require Exporter;  require Exporter;
305    
306  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
307  our @EXPORT = qw(  our @EXPORT = qw(
308            is_overbeek_tree
309            is_gjonewick_tree
310          overbeek_to_gjonewick          overbeek_to_gjonewick
311          gjonewick_to_overbeek          gjonewick_to_overbeek
   
312          newick_is_valid          newick_is_valid
313          newick_is_rooted          newick_is_rooted
314          newick_is_unrooted          newick_is_unrooted
315          tree_rooted_on_tip          tree_rooted_on_tip
316          newick_is_bifurcating          newick_is_bifurcating
317          newick_tip_count          newick_tip_count
318            newick_tip_ref_list
319          newick_tip_list          newick_tip_list
320    
321          newick_first_tip          newick_first_tip
322          newick_duplicated_tips          newick_duplicated_tips
323          newick_tip_in_tree          newick_tip_in_tree
324          newick_shared_tips          newick_shared_tips
325    
326          newick_tree_length          newick_tree_length
327            newick_tip_distances
328          newick_max_X          newick_max_X
329          newick_most_distant_tip_ref          newick_most_distant_tip_ref
330          newick_most_distant_tip_name          newick_most_distant_tip_name
331    
332          std_newick_name          newick_tip_insertion_point
333    
334            std_tree_name
335    
336          path_to_tip          path_to_tip
337          path_to_named_node          path_to_named_node
# Line 290  Line 353 
353          newick_set_all_branches          newick_set_all_branches
354          newick_fix_negative_branches          newick_fix_negative_branches
355          newick_rescale_branches          newick_rescale_branches
356            newick_modify_branches
357    
358          newick_strip_comments          newick_strip_comments
359    
# Line 305  Line 369 
369          reroot_newick_next_to_tip          reroot_newick_next_to_tip
370          reroot_newick_to_node          reroot_newick_to_node
371          reroot_newick_to_node_ref          reroot_newick_to_node_ref
372            reroot_newick_between_nodes
373            reroot_newick_to_midpoint
374            reroot_newick_to_midpoint_w
375          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
376          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
377          uproot_tip_rooted_newick          uproot_tip_rooted_newick
378          uproot_newick          uproot_newick
379    
380          prune_from_newick          prune_from_newick
381            rooted_newick_subtree
382          newick_subtree          newick_subtree
383            newick_covering_subtree
384          collapse_zero_length_branches          collapse_zero_length_branches
385    
386          newick_insert_at_node          newick_insert_at_node
387          newick_insert_between_nodes          newick_insert_between_nodes
388    
389            root_neighborhood_representative_tree
390            root_neighborhood_representative_tips
391            tip_neighborhood_representative_tree
392            tip_neighborhood_representative_tips
393    
394          writeNewickTree          writeNewickTree
395          fwriteNewickTree          fwriteNewickTree
396          strNewickTree          strNewickTree
# Line 362  Line 436 
436          );          );
437    
438    
 use gjolists qw(  
         common_prefix  
         unique_suffixes  
   
         duplicates  
         random_order  
   
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
439  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
440  #  Internally used definitions  #  Internally used definitions
441  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 386  Line 445 
445    
446    
447  #===============================================================================  #===============================================================================
448  #  Interconvert Overbeek and gjonewick trees:  #  Interconvert overbeek and gjonewick trees:
449  #===============================================================================  #===============================================================================
450    
451    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
452    
453    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
454    
455  sub overbeek_to_gjonewick  sub overbeek_to_gjonewick
456  {  {
457      return () unless ref( $_[0] ) eq 'ARRAY';      return () unless ref( $_[0] ) eq 'ARRAY';
# Line 408  Line 471 
471      return $node;      return $node;
472  }  }
473    
474    
475  #===============================================================================  #===============================================================================
476  #  Extract tree structure values:  #  Extract tree structure values:
477  #===============================================================================  #===============================================================================
# Line 428  Line 492 
492  #  #
493  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
494    
495  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
496  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
497  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
498  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
499  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
500  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
501  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
502  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
503    
504  sub newick_desc_list {  sub newick_desc_list {
505      my $node = $_[0];      my $node = $_[0];
# Line 641  Line 705 
705  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
706  #  List of tip nodes:  #  List of tip nodes:
707  #  #
708  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
709    # \@tips = newick_tip_ref_list( $noderef )
710  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
711  sub newick_tip_ref_list {  sub newick_tip_ref_list {
712      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 658  Line 723 
723          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
724      }      }
725    
726      @list;      wantarray ? @list : \@list;
727  }  }
728    
729    
# Line 666  Line 731 
731  #  List of tips:  #  List of tips:
732  #  #
733  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
734    # \@tips = newick_tip_list( $node )
735  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
736  sub newick_tip_list {  sub newick_tip_list {
737      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
738        wantarray ? @tips : \@tips;
739  }  }
740    
741    
# Line 707  Line 774 
774  #  List of duplicated tip labels.  #  List of duplicated tip labels.
775  #  #
776  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
777    # \@tips = newick_duplicated_tips( $node )
778  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
779  sub newick_duplicated_tips {  sub newick_duplicated_tips {
780      gjolists::duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
781        wantarray ? @tips : \@tips;
782  }  }
783    
784    
# Line 740  Line 809 
809  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
810  #  #
811  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
812    # \@tips = newick_shared_tips( $tree1, $tree2 )
813  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
814  sub newick_shared_tips {  sub newick_shared_tips {
815      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
816      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
817      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
818      gjolists::intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
819        wantarray ? @tips : \@tips;
820  }  }
821    
822    
# Line 767  Line 838 
838    
839    
840  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
841    #  Hash of tip nodes and corresponding distances from root:
842    #
843    #   %tip_distances = newick_tip_distances( $node )
844    #  \%tip_distances = newick_tip_distances( $node )
845    #-------------------------------------------------------------------------------
846    sub newick_tip_distances
847    {
848        my ( $node, $x, $hash ) = @_;
849        my $root = ! $hash;
850        ref( $hash ) eq 'HASH' or $hash = {};
851    
852        $x ||= 0;
853        $x  += newick_x( $node ) || 0;
854    
855        #  Is it a tip?
856    
857        my $n_desc = newick_n_desc( $node );
858        if ( ! $n_desc )
859        {
860            $hash->{ newick_lbl( $node ) } = $x;
861            return $hash;
862        }
863    
864        #  Tree rooted on tip?
865    
866        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
867        {
868            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
869        }
870    
871        foreach ( newick_desc_list( $node ) )
872        {
873            newick_tip_distances( $_, $x, $hash );
874        }
875    
876        wantarray ? %$hash : $hash;
877    }
878    
879    
880    #-------------------------------------------------------------------------------
881  #  Tree max X.  #  Tree max X.
882  #  #
883  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 910  Line 1021 
1021    
1022      else      else
1023      {      {
1024          my ( $n1, $x1 ) = describe_desc( $dl->[0] );          my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1025          my ( $n2, $x2 ) = describe_desc( $dl->[1] );          my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1026    
1027          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1028          if ( @$n2 == 2 )          if ( @$n2 == 2 )
# Line 926  Line 1037 
1037  }  }
1038    
1039    
1040  sub describe_desc  sub describe_descendant
1041  {  {
1042      my $node = shift;      my $node = shift;
1043    
# Line 951  Line 1062 
1062      #  Return the two lowest of those (the third will come from the      #  Return the two lowest of those (the third will come from the
1063      #  other side of the original node).      #  other side of the original node).
1064    
     else  
     {  
1065          my @rep_tips = sort { lc $a cmp lc $b }          my @rep_tips = sort { lc $a cmp lc $b }
1066                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1067                         @$dl;                         @$dl;
1068          return ( [ @rep_tips[0,1] ], $x );          return ( [ @rep_tips[0,1] ], $x );
1069      }      }
 }  
1070    
1071    
1072  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 967  Line 1075 
1075  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1076  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1077  #  #
1078  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1079  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1080  sub std_node_name {  sub std_node_name {
1081      my $tree = $_[0];      my $tree = $_[0];
# Line 985  Line 1093 
1093      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1094    
1095      my @rest = newick_tip_list( $tree );      my @rest = newick_tip_list( $tree );
1096      my @best = map {      my @best = map
1097              {
1098              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1099              @rest = gjolists::set_difference( \@rest, \@tips );              @rest = &set_difference( \@rest, \@tips );
1100              $tips[0];              $tips[0];
1101          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1102    
# Line 1075  Line 1184 
1184      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1185      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1186         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1187         if ( @path ) { return @path }          return @path if @path;
1188      }      }
1189    
1190      ();  #  Not found      ();  #  Not found
# Line 1106  Line 1215 
1215      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1216    
1217      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1218      my @p12 = gjolists::common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1219      my @p13 = gjolists::common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1220      my @p23 = gjolists::common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1221    
1222      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1223      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 1159  Line 1268 
1268      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1269    
1270      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1271      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1272      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1273      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1274    
# Line 1184  Line 1293 
1293      my @p2 = path_to_node( $node, $node2 ) or return undef;      my @p2 = path_to_node( $node, $node2 ) or return undef;
1294    
1295      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1296      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1297      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1298      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1299    
# Line 1435  Line 1544 
1544    
1545    
1546  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1547    #  Modify all branch lengths by a function.
1548    #
1549    #     $node = newick_modify_branches( $node, \&function )
1550    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1551    #
1552    #  Function must have form
1553    #
1554    #     $x2 = &$function( $x1 )
1555    #     $x2 = &$function( $x1, @$func_parms )
1556    #
1557    #-------------------------------------------------------------------------------
1558    sub newick_modify_branches {
1559        my ( $node, $func, $parm ) = @_;
1560    
1561        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1562        foreach ( newick_desc_list( $node ) )
1563        {
1564            newick_modify_branches( $_, $func, $parm )
1565        }
1566    
1567        $node;
1568    }
1569    
1570    
1571    #-------------------------------------------------------------------------------
1572  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1573  #  #
1574  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1523  Line 1657 
1657    
1658    
1659  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1660    #  Standard name for a Newick tree topology
1661    #
1662    #    $stdname = std_tree_name( $tree )
1663    #
1664    #-------------------------------------------------------------------------------
1665    sub std_tree_name
1666    {
1667        my ( $tree ) = @_;
1668        my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
1669        ( std_tree_name_2( reroot_newick_next_to_tip( copy_newick_tree( $tree ), $mintip ) ) )[0];
1670    }
1671    
1672    
1673    #
1674    #  ( $name, $mintip ) = std_tree_name_2( $node )
1675    #
1676    sub std_tree_name_2
1677    {
1678        my ( $node ) = @_;
1679    
1680        my @descends = newick_desc_list( $node );
1681        if ( @descends == 0 )
1682        {
1683            my $lbl = newick_lbl( $node );
1684            return ( $lbl, $lbl );
1685        }
1686    
1687        my @list = sort { lc $a->[1] cmp lc $b->[1] || $a->[1] cmp $b->[1] }
1688                   map  { [ std_tree_name_2( $_ ) ] }
1689                   @descends;
1690        my $mintip = $list[0]->[1];
1691        my $name   = '(' . join( "\t", map { $_->[0] } @list ) . ')';
1692    
1693        return ( $name, $mintip );
1694    }
1695    
1696    
1697    #-------------------------------------------------------------------------------
1698  #  Move largest groups to periphery of tree (in place).  #  Move largest groups to periphery of tree (in place).
1699  #  #
1700  #      dir  <= -2 for up-sweeping tree (big groups always first),  #      dir  <= -2 for up-sweeping tree (big groups always first),
# Line 1582  Line 1754 
1754      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1755      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1756    
     #  Reorder this subtree:  
   
1757      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1758      if    ( $dir < 0 ) {                   #  Big group first  
1759          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1760    
1761        if ( $dir )
1762        {
1763            #  Big group first
1764            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1765    
1766            my ( @dl1, @dl2 );
1767            for ( my $i = 0; $i < $nd; $i++ ) {
1768                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1769      }      }
1770      elsif ( $dir > 0 ) {                   #  Small group first  
1771          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1772                                    : ( @dl2, reverse @dl1 );
1773      }      }
1774    
1775      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1621  Line 1801 
1801  #  #
1802  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1803  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1804  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1805    {
1806      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1807      my %cnt;      my %cnt;
1808    
# Line 1641  Line 1822 
1822  #           = 0 for no change, and  #           = 0 for no change, and
1823  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1824  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1825  sub reorder_against_tip_count {  sub reorder_against_tip_count
1826    {
1827      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1828    
1829      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1680  Line 1862 
1862  #  #
1863  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1864  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1865  sub random_order_newick_tree {  sub random_order_newick_tree
1866    {
1867      my ( $node ) = @_;      my ( $node ) = @_;
1868    
1869      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1689  Line 1872 
1872      #  Reorder this subtree:      #  Reorder this subtree:
1873    
1874      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1875      @$dl_ref = gjolists::random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1876    
1877      #  Reorder descendants:      #  Reorder descendants:
1878    
# Line 1704  Line 1887 
1887  #  #
1888  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1889  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1890  sub reroot_newick_by_path {  sub reroot_newick_by_path
1891    {
1892      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1893      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1894    
# Line 1812  Line 1996 
1996    
1997    
1998  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1999    #  Reroot a newick tree along the path between 2 nodes:
2000    #
2001    #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
2002    #-------------------------------------------------------------------------------
2003    sub reroot_newick_between_nodes
2004    {
2005        my ( $tree, $node1, $node2, $fraction ) = @_;
2006        array_ref( $tree ) or return undef;
2007        $fraction >= 0 && $fraction <= 1 or return undef;
2008    
2009        #  Find the paths to the nodes:
2010    
2011        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2012        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2013    
2014        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2015    }
2016    
2017    
2018    #-------------------------------------------------------------------------------
2019    #  Reroot a newick tree along the path between 2 nodes:
2020    #
2021    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
2022    #-------------------------------------------------------------------------------
2023    sub reroot_newick_between_node_refs
2024    {
2025        my ( $tree, $node1, $node2, $fraction ) = @_;
2026        array_ref( $tree ) or return undef;
2027    
2028        #  Find the paths to the nodes:
2029    
2030        my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
2031        my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
2032    
2033        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2034    }
2035    
2036    
2037    #-------------------------------------------------------------------------------
2038    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2039    #
2040    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
2041    #-------------------------------------------------------------------------------
2042    sub reroot_newick_between_nodes_by_path
2043    {
2044        my ( $tree, $path1, $path2, $fraction ) = @_;
2045        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
2046           or return undef;
2047        $fraction >= 0 && $fraction <= 1 or return undef;
2048    
2049        my @path1 = @$path1;
2050        my @path2 = @$path2;
2051    
2052        #  Trim the common prefix, saving it:
2053    
2054        my @prefix = ();
2055        while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2056        {
2057            push @prefix, splice( @path1, 0, 2 );
2058            splice( @path2, 0, 2 );
2059        }
2060    
2061        my ( @path, $dist );
2062        if    ( @path1 < 3 )
2063        {
2064            @path2 >= 3 or return undef;              # node1 = node2
2065            $dist = $fraction * newick_path_length( @path2 );
2066            @path = @path2;
2067        }
2068        elsif ( @path2 < 3 )
2069        {
2070            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2071            @path = @path1;
2072        }
2073        else
2074        {
2075            my $dist1 = newick_path_length( @path1 );
2076            my $dist2 = newick_path_length( @path2 );
2077            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2078            @path = ( $dist <= 0 ) ? @path1 : @path2;
2079            $dist = abs( $dist );
2080        }
2081    
2082        #  Descend tree until we reach the insertion branch:
2083    
2084        my $x;
2085        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2086        {
2087            $dist -= $x;
2088            push @prefix, splice( @path, 0, 2 );
2089        }
2090    
2091        #  Insert the new node:
2092    
2093        my $newnode = [ [ $path[2] ], undef, $dist ];
2094        set_newick_desc_i( $path[0], $path[1], $newnode );
2095        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2096    
2097        #  We can now build the path from root to the new node
2098    
2099        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2100    }
2101    
2102    
2103    #-------------------------------------------------------------------------------
2104  #  Move root of tree to an approximate midpoint.  #  Move root of tree to an approximate midpoint.
2105  #  #
2106  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $newtree = reroot_newick_to_approx_midpoint( $tree )
# Line 1823  Line 2112 
2112    
2113      my $dists1 = average_to_tips_1( $tree );      my $dists1 = average_to_tips_1( $tree );
2114    
2115      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint
2116        #  cadidates as a list of [ $node1, $node2, $fraction ]
2117    
2118        my @mids = average_to_tips_2( $dists1, undef, undef );
2119    
2120        #  Reroot to first midpoint candidate
2121    
2122        return $tree if ! @mids;
2123        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2124        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2125    }
2126    
2127    
2128    #-------------------------------------------------------------------------------
2129    #  Move root of tree to a midpoint.
2130    #
2131    #  $newtree = reroot_newick_to_midpoint( $tree )
2132    #-------------------------------------------------------------------------------
2133    sub reroot_newick_to_midpoint {
2134        my ( $tree ) = @_;
2135    
2136        #  Compile average tip to node distances assending
2137    
2138        my $dists1 = average_to_tips_1( $tree );
2139    
2140      my $node = average_to_tips_2( $dists1, undef, undef );      #  Compile average tip to node distances descending, returning midpoint
2141        #  [ $node1, $node2, $fraction ]
2142    
2143      #  Reroot      my @mids = average_to_tips_2( $dists1, undef, undef );
2144    
2145      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2146  }  }
2147    
2148    
2149    #-------------------------------------------------------------------------------
2150    #  Compile average tip to node distances assending
2151    #-------------------------------------------------------------------------------
2152  sub average_to_tips_1 {  sub average_to_tips_1 {
2153      my ( $node ) = @_;      my ( $node ) = @_;
2154    
# Line 1843  Line 2159 
2159          foreach ( @desc_dists ) { $x_below += $_->[0] }          foreach ( @desc_dists ) { $x_below += $_->[0] }
2160          $x_below /= @desc_dists;          $x_below /= @desc_dists;
2161      }      }
2162    
2163      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2164      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2165    
# Line 1850  Line 2167 
2167  }  }
2168    
2169    
2170    #-------------------------------------------------------------------------------
2171    #  Compile average tip to node distances descending, returning midpoint as
2172    #  [ $node1, $node2, $fraction_of_dist_between ]
2173    #-------------------------------------------------------------------------------
2174  sub average_to_tips_2 {  sub average_to_tips_2 {
2175      my ( $dists1, $x_above, $anc_node ) = @_;      my ( $dists1, $x_above, $anc_node ) = @_;
2176      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2177    
2178      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2179    
2180      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2181      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2182      {      {
2183          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1872  Line 2189 
2189    
2190          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2191          {          {
2192              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2193          }              #  the way from $node to $anc_node:
2194          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2195          {                                     : 0.5;
2196              return undef;              push @mids, [ $node, $anc_node, $fract ];
2197          }          }
2198      }      }
2199    
2200      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2201    
2202      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2203      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1890  Line 2207 
2207          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2208    
2209          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2210          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2211      }      }
2212    
2213      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2214  }  }
2215    
2216    
# Line 1908  Line 2222 
2222  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2223      my ( $tree ) = @_;      my ( $tree ) = @_;
2224    
2225        #  Compile average tip to node distances assending from tips
2226    
2227        my $dists1 = average_to_tips_1_w( $tree );
2228    
2229        #  Compile average tip to node distances descending, returning midpoints
2230    
2231        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2232    
2233        #  Reroot to first midpoint candidate
2234    
2235        return $tree if ! @mids;
2236        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2237        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2238    }
2239    
2240    
2241    #-------------------------------------------------------------------------------
2242    #  Move root of tree to an approximate midpoint.  Weight by tips.
2243    #
2244    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2245    #-------------------------------------------------------------------------------
2246    sub reroot_newick_to_midpoint_w {
2247        my ( $tree ) = @_;
2248    
2249      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2250    
2251      my $dists1 = average_to_tips_1_w( $tree );      my $dists1 = average_to_tips_1_w( $tree );
2252    
2253      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2254    
2255      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2256    
2257      #  Reroot      #  Reroot at first candidate midpoint
2258    
2259      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2260  }  }
2261    
2262    
# Line 1939  Line 2277 
2277          }          }
2278          $x_below /= $n_below;          $x_below /= $n_below;
2279      }      }
2280    
2281      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2282      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2283    
# Line 1952  Line 2291 
2291    
2292      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2293    
2294      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2295      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2296      {      {
2297          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1964  Line 2299 
2299          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2300          #  path from the root of the current tree).          #  path from the root of the current tree).
2301          #          #
2302          #  Is the root in the current branch?          #  Is their a root in the current branch?
2303    
2304          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2305          {          {
2306              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2307          }              #  the way from $node to $anc_node:
2308          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2309          {                                     : 0.5;
2310              return undef;              push @mids, [ $node, $anc_node, $fract ];
2311          }          }
2312      }      }
2313    
# Line 1992  Line 2327 
2327    
2328          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2329                                   : 0;                                   : 0;
2330          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2331      }      }
2332    
2333      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2334  }  }
2335    
2336    
# Line 2313  Line 2645 
2645    
2646    
2647  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2648  #  Produce a subtree with the desired tips:  #  Produce a potentially rooted subtree with the desired tips:
2649  #  #
2650  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
2651  #     There is no check that requested tips exist.  #     There is no check that requested tips exist.
2652  #  #
2653  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = rooted_newick_subtree( $tree,  @tips )
2654  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = rooted_newick_subtree( $tree, \@tips )
2655  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2656  sub newick_subtree {  sub rooted_newick_subtree {
2657      my ( $tr, @tips ) = @_;      my ( $tr, @tips ) = @_;
2658      if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }      if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2659    
2660      if ( @tips < 2 ) { return undef }      if ( @tips < 2 ) { return undef }
     my $was_rooted = newick_is_rooted( $tr );  
2661      my $keephash = { map { ( $_, 1 ) } @tips };      my $keephash = { map { ( $_, 1 ) } @tips };
2662      my $tr2 = subtree1( $tr, $keephash );      my $tr2 = subtree1( $tr, $keephash );
     $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 );  
2663      $tr2->[2] = undef if $tr2;                   # undef root branch length      $tr2->[2] = undef if $tr2;                   # undef root branch length
2664      $tr2;      $tr2;
2665  }  }
2666    
2667    
2668  sub subtree1 {  #-------------------------------------------------------------------------------
2669      my ( $tr, $keep ) = @_;  #  Produce a subtree with the desired tips:
2670      my @desc1 = newick_desc_list( $tr );  #
2671    #     Except for (some) tip nodes, the tree produced is a copy.
2672    #     There is no check that requested tips exist.
2673    #
2674    #  $newtree = newick_subtree( $tree,  @tips )
2675    #  $newtree = newick_subtree( $tree, \@tips )
2676    #-------------------------------------------------------------------------------
2677    sub newick_subtree {
2678        my ( $tr, @tips ) = @_;
2679        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2680    
2681        if ( @tips < 2 ) { return undef }
2682        my $was_rooted = newick_is_rooted( $tr );
2683        my $keephash = { map { ( $_, 1 ) } @tips };
2684        my $tr2 = subtree1( $tr, $keephash );
2685        $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 );
2686        $tr2->[2] = undef if $tr2;                   # undef root branch length
2687        $tr2;
2688    }
2689    
2690    
2691    sub subtree1 {
2692        my ( $tr, $keep ) = @_;
2693        my @desc1 = newick_desc_list( $tr );
2694    
2695      #  Is this a tip, and is it in the keep list?      #  Is this a tip, and is it in the keep list?
2696    
2697      if ( @desc1 < 1 ) {      if ( @desc1 < 1 ) {
# Line 2386  Line 2739 
2739  }  }
2740    
2741    
2742    #-------------------------------------------------------------------------------
2743    #  The smallest subtree of rooted tree that includes @tips:
2744    #
2745    #    $node = newick_covering_subtree( $tree,  @tips )
2746    #    $node = newick_covering_subtree( $tree, \@tips )
2747    #-------------------------------------------------------------------------------
2748    
2749    sub newick_covering_subtree {
2750        my $tree = shift;
2751        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2752    
2753        #  Return smallest covering node, if any:
2754    
2755        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2756    }
2757    
2758    
2759    sub newick_covering_subtree_1 {
2760        my ( $node, $tips ) = @_;
2761        my $n_cover = 0;
2762        my @desc = newick_desc_list( $node );
2763        if ( @desc )
2764        {
2765            foreach ( @desc )
2766            {
2767                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2768                return ( $subtree, $n ) if $subtree;
2769                $n_cover += $n;
2770            }
2771        }
2772        elsif ( $tips->{ newick_lbl( $node ) } )
2773        {
2774            $n_cover++;
2775        }
2776    
2777        #  If all tips are covered, return node
2778    
2779        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2780    }
2781    
2782    
2783    #===============================================================================
2784    #
2785    #  Representative subtrees
2786    #
2787    #===============================================================================
2788    #  Find subtree of size n representating vicinity of the root:
2789    #
2790    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2791    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2792    #
2793    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2794    #  consequences on downstream programs.
2795    #-------------------------------------------------------------------------------
2796    sub root_neighborhood_representative_tree
2797    {
2798        my ( $tree, $n, $tip_priority ) = @_;
2799        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2800        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2801    
2802        $tip_priority ||= default_tip_priority( $tree );
2803        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2804                   root_proximal_newick_subtrees( $tree, $n );
2805    
2806        newick_subtree( copy_newick_tree( $tree ), \@tips );
2807    }
2808    
2809    
2810    #-------------------------------------------------------------------------------
2811    #  Find n tips to represent tree lineages in vicinity of another tip.
2812    #  Default tip priority is short total branch length.
2813    #
2814    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2815    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2816    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2817    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2818    #-------------------------------------------------------------------------------
2819    sub root_neighborhood_representative_tips
2820    {
2821        my ( $tree, $n, $tip_priority ) = @_;
2822        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2823    
2824        my @tips;
2825        if ( newick_tip_count( $tree ) <= $n )
2826        {
2827            @tips = newick_tip_list( $tree );
2828        }
2829        else
2830        {
2831            $tip_priority ||= default_tip_priority( $tree );
2832            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2833                    root_proximal_newick_subtrees( $tree, $n );
2834        }
2835    
2836        wantarray ? @tips : \@tips;
2837    }
2838    
2839    
2840    #-------------------------------------------------------------------------------
2841    #  Find subtree of size n representating vicinity of a tip:
2842    #
2843    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2844    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2845    #-------------------------------------------------------------------------------
2846    sub tip_neighborhood_representative_tree
2847    {
2848        my ( $tree, $tip, $n, $tip_priority ) = @_;
2849        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2850        newick_tip_in_tree( $tree, $tip ) or return undef;
2851    
2852        my $tree1 = copy_newick_tree( $tree );
2853        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2854        {
2855            return prune_from_newick( $tree1, $tip )
2856        }
2857    
2858        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2859        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2860        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2861        newick_subtree( copy_newick_tree( $tree ), \@tips );
2862    }
2863    
2864    
2865    #-------------------------------------------------------------------------------
2866    #  Find n tips to represent tree lineages in vicinity of another tip.
2867    #  Default tip priority is short total branch length.
2868    #
2869    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2870    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2871    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2872    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2873    #-------------------------------------------------------------------------------
2874    sub tip_neighborhood_representative_tips
2875    {
2876        my ( $tree, $tip, $n, $tip_priority ) = @_;
2877        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2878        newick_tip_in_tree( $tree, $tip ) or return undef;
2879    
2880        my @tips = newick_tip_list( $tree );
2881        if ( newick_tip_count( $tree ) - 1 <= $n )
2882        {
2883            @tips = grep { $_ ne $tip } @tips;
2884        }
2885        else
2886        {
2887            my $tree1 = copy_newick_tree( $tree );
2888            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2889            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2890            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2891        }
2892    
2893        wantarray ? @tips : \@tips;
2894    }
2895    
2896    
2897    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2898    #  Anonymous hash of the negative distance from root to each tip:
2899    #
2900    #   \%tip_priority = default_tip_priority( $tree )
2901    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2902    sub default_tip_priority
2903    {
2904        my ( $tree ) = @_;
2905        my $tip_distances = newick_tip_distances( $tree ) || {};
2906        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2907    }
2908    
2909    
2910    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2911    #  Select a tip from a subtree base on a priority value:
2912    #
2913    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2914    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2915    sub representative_tip_of_newick_node
2916    {
2917        my ( $node, $tip_priority ) = @_;
2918        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2919                      map  { [ $_, $tip_priority->{ $_ } ] }
2920                      newick_tip_list( $node );
2921        $tip->[0];                                   # Label from label-priority pair
2922    }
2923    
2924    
2925    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2926    #  Find n subtrees focused around the root of a tree.  Typically each will
2927    #  then be reduced to a single tip to make a representative tree:
2928    #
2929    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2930    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2931    sub root_proximal_newick_subtrees
2932    {
2933        my ( $tree, $n ) = @_;
2934        my $node_start_end = newick_branch_intervals( $tree );
2935        n_representative_branches( $n, $node_start_end );
2936    }
2937    
2938    
2939    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2940    #   @node_start_end = newick_branch_intervals( $tree )
2941    #  \@node_start_end = newick_branch_intervals( $tree )
2942    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2943    sub newick_branch_intervals
2944    {
2945        my ( $node, $parent_x ) = @_;
2946        $parent_x ||= 0;
2947        my ( $desc, undef, $dx ) = @$node;
2948        my $x = $parent_x + $dx;
2949        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2950        my @intervals = ( $interval,
2951                          map { &newick_branch_intervals( $_, $x ) } @$desc
2952                        );
2953        return wantarray ? @intervals : \@intervals;
2954    }
2955    
2956    
2957    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2958    #   @ids = n_representative_branches( $n,  @id_start_end )
2959    #   @ids = n_representative_branches( $n, \@id_start_end )
2960    #  \@ids = n_representative_branches( $n,  @id_start_end )
2961    #  \@ids = n_representative_branches( $n, \@id_start_end )
2962    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2963    sub n_representative_branches
2964    {
2965        my $n = shift;
2966        #  Sort intervals by start point:
2967        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2968                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2969        my @active = ();
2970        my ( $interval, $current_point );
2971        foreach $interval ( @unprocessed )
2972        {
2973            $current_point = $interval->[1];
2974            #  Filter out intervals that have ended.  This is N**2 in the number
2975            #  of representatives.  Fixing this would require maintaining a sorted
2976            #  active list.
2977            @active = grep { $_->[2] > $current_point } @active;
2978            push @active, $interval;
2979            last if ( @active >= $n );
2980        }
2981    
2982        my @ids = map { $_->[0] } @active;
2983        return wantarray() ? @ids : \@ids;
2984    }
2985    
2986    
2987  #===============================================================================  #===============================================================================
2988  #  #
2989  #  Tree writing and reading  #  Tree writing and reading
# Line 2553  Line 3151 
3151      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3152      my $tree;      my $tree;
3153      my @lines = ();      my @lines = ();
3154      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3155      {      {
3156          chomp;          chomp;
3157          push @lines, $_;          push @lines, $_;
# Line 2575  Line 3173 
3173      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3174      my @trees = ();      my @trees = ();
3175      my @lines = ();      my @lines = ();
3176      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3177      {      {
3178          chomp;          chomp;
3179          push @lines, $_;          push @lines, $_;
# Line 2762  Line 3360 
3360      #  Loop while it is a comment:      #  Loop while it is a comment:
3361      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3362          $ind++;          $ind++;
3363            my $depth = 1;
3364            my $ind2  = $ind;
3365    
3366          #  Find end          #  Find end
3367          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3368            {
3369                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3370                {
3371                    $ind2 += length( $1 );  #  Points at char just past [
3372                    $depth++;               #  If nested comments are allowed
3373                }
3374                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3375                {
3376                    $ind2 += length( $1 );  #  Points at char just past ]
3377                    $depth--;
3378                }
3379                else
3380                {
3381              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3382                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3383          }          }
3384          my $comment = $1;          }
3385    
3386          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3387          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3388    
3389          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3390    
3391          #  Skip white space          #  Skip white space
3392          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2792  Line 3405 
3405  #===============================================================================  #===============================================================================
3406  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3407  #  #
3408  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3409  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3410  #     $width  the approximate characters for the tree without labels  #
3411  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3412  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3413    #     $width  # the approximate characters for the tree without labels (D = 68)
3414    #     $min_dx # the minimum horizontal branch length (D = 2)
3415    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3416    #
3417    #  Options:
3418    #
3419    #    dy     => nat_number    # the vertical space per taxon
3420    #    chars  => key           # line drawing character set:
3421    #                            #     html_unicode
3422    #                            #     text (default)
3423    #    min_dx => whole_number  # the minimum horizontal branch length
3424    #    width  => whole_number  # approximate tree width without labels
3425  #  #
 #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  
3426  #===============================================================================  #===============================================================================
3427  sub printer_plot_newick {  sub printer_plot_newick
3428      my ( $node, $file, $width, $min_dx, $dy ) = @_;  {
3429        my ( $node, $file, @opts ) = @_;
3430    
3431      my ( $fh, $close ) = open_output( $file );      my ( $fh, $close ) = open_output( $file );
3432      $fh or return;      $fh or return;
3433    
3434      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3435                            && $opts[0]->{ chars }
3436                            && $opts[0]->{ chars } =~ /html/;
3437        print $fh '<PRE>' if $html;
3438        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3439        print $fh "</PRE>\n" if $html;
3440    
3441      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3442  }  }
3443    
3444    
3445  #===============================================================================  #===============================================================================
3446    #  Character sets for printer plot trees:
3447    #-------------------------------------------------------------------------------
3448    
3449    my %char_set =
3450      ( text1     => { space  => ' ',
3451                       horiz  => '-',
3452                       vert   => '|',
3453                       el_d_r => '/',
3454                       el_u_r => '\\',
3455                       el_d_l => '\\',
3456                       el_u_l => '/',
3457                       tee_l  => '+',
3458                       tee_r  => '+',
3459                       tee_u  => '+',
3460                       tee_d  => '+',
3461                       half_l => '-',
3462                       half_r => '-',
3463                       half_u => '|',
3464                       half_d => '|',
3465                       cross  => '+',
3466                     },
3467        text2     => { space  => ' ',
3468                       horiz  => '-',
3469                       vert   => '|',
3470                       el_d_r => '+',
3471                       el_u_r => '+',
3472                       el_d_l => '+',
3473                       el_u_l => '+',
3474                       tee_l  => '+',
3475                       tee_r  => '+',
3476                       tee_u  => '+',
3477                       tee_d  => '+',
3478                       half_l => '-',
3479                       half_r => '-',
3480                       half_u => '|',
3481                       half_d => '|',
3482                       cross  => '+',
3483                     },
3484        html_box  => { space  => '&nbsp;',
3485                       horiz  => '&#9472;',
3486                       vert   => '&#9474;',
3487                       el_d_r => '&#9484;',
3488                       el_u_r => '&#9492;',
3489                       el_d_l => '&#9488;',
3490                       el_u_l => '&#9496;',
3491                       tee_l  => '&#9508;',
3492                       tee_r  => '&#9500;',
3493                       tee_u  => '&#9524;',
3494                       tee_d  => '&#9516;',
3495                       half_l => '&#9588;',
3496                       half_r => '&#9590;',
3497                       half_u => '&#9589;',
3498                       half_d => '&#9591;',
3499                       cross  => '&#9532;',
3500                     },
3501        utf8_box  => { space  => ' ',
3502                       horiz  => chr(226) . chr(148) . chr(128),
3503                       vert   => chr(226) . chr(148) . chr(130),
3504                       el_d_r => chr(226) . chr(148) . chr(140),
3505                       el_u_r => chr(226) . chr(148) . chr(148),
3506                       el_d_l => chr(226) . chr(148) . chr(144),
3507                       el_u_l => chr(226) . chr(148) . chr(152),
3508                       tee_l  => chr(226) . chr(148) . chr(164),
3509                       tee_r  => chr(226) . chr(148) . chr(156),
3510                       tee_u  => chr(226) . chr(148) . chr(180),
3511                       tee_d  => chr(226) . chr(148) . chr(172),
3512                       half_l => chr(226) . chr(149) . chr(180),
3513                       half_r => chr(226) . chr(149) . chr(182),
3514                       half_u => chr(226) . chr(149) . chr(181),
3515                       half_d => chr(226) . chr(149) . chr(183),
3516                       cross  => chr(226) . chr(148) . chr(188),
3517                     },
3518      );
3519    
3520    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3521    $char_set{ html1 }->{ space } = '&nbsp;';
3522    
3523    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3524    $char_set{ html2 }->{ space } = '&nbsp;';
3525    
3526    #  Define some synonyms
3527    
3528    $char_set{ html } = $char_set{ html_box };
3529    $char_set{ line } = $char_set{ utf8_box };
3530    $char_set{ symb } = $char_set{ utf8_box };
3531    $char_set{ text } = $char_set{ text1 };
3532    $char_set{ utf8 } = $char_set{ utf8_box };
3533    
3534    #  Define tree formats and synonyms
3535    
3536    my %tree_format =
3537        ( text         => 'text',
3538          tree_tab_lbl => 'tree_tab_lbl',
3539          tree_lbl     => 'tree_lbl',
3540          chrlist_lbl  => 'chrlist_lbl',
3541          raw          => 'chrlist_lbl',
3542        );
3543    
3544    #===============================================================================
3545  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3546  #  #
3547  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3548  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3549  #     $min_dx the minimum horizontal branch length  #
3550  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3551    #     $width  # the approximate characters for the tree without labels (D = 68)
3552    #     $min_dx # the minimum horizontal branch length (D = 2)
3553    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3554    #
3555    #  Options:
3556    #
3557    #    chars  => keyword       # the output character set for the tree
3558    #    dy     => nat_number    # the vertical space per taxon
3559    #    format => keyword       # output format of each line
3560    #    min_dx => whole_number  # the minimum horizontal branch length
3561    #    width  => whole_number  # approximate tree width without labels
3562    #
3563    #  Character sets:
3564    #
3565    #    html       #  synonym of html1
3566    #    html_box   #  html encoding of unicode box drawing characters
3567    #    html1      #  text1 with nonbreaking spaces
3568    #    html2      #  text2 with nonbreaking spaces
3569    #    line       #  synonym of utf8_box
3570    #    raw        #  pass out the internal representation
3571    #    symb       #  synonym of utf8_box
3572    #    text       #  synonym of text1 (Default)
3573    #    text1      #  ascii characters: - + | / \ and space
3574    #    text2      #  ascii characters: - + | + + and space
3575    #    utf8       #  synonym of utf8_box
3576    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3577    #
3578    #  Formats for row lines:
3579    #
3580    #    text           #    $textstring              # Default
3581    #    tree_tab_lbl   #    $treestr \t $labelstr
3582    #    tree_lbl       # [  $treestr,  $labelstr ]
3583    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3584    #    raw            #  synonym of chrlist_lbl
3585  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3586  #===============================================================================  #===============================================================================
3587  sub text_plot_newick {  sub text_plot_newick
3588      my ( $node, $width, $min_dx, $dy ) = @_;  {
3589        my $node = shift @_;
3590      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3591      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3592      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3593      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3594        {
3595            $opts = shift;
3596        }
3597        else
3598        {
3599            ( $width, $min_dx, $dy ) = @_;
3600            $opts = {};
3601        }
3602    
3603        $chars = $opts->{ chars } || '';
3604        my $charH;
3605        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3606        my $is_box = $charH eq $char_set{ html_box }
3607                  || $charH eq $char_set{ utf8_box }
3608                  || $chars eq 'raw';
3609    
3610        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3611        $fmt = $tree_format{ $fmt || '' } || 'text';
3612    
3613        $dy    ||= $opts->{ dy     } ||  1;
3614        $width ||= $opts->{ width  } || 68;
3615        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3616        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3617    
3618        #  Layout the tree:
3619    
3620      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3621      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2835  Line 3624 
3624      my $hash = {};      my $hash = {};
3625      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3626    
3627      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3628    
3629      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3630      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3631        foreach ( ( $y1 .. $y2 ) )
3632        {
3633            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3634            my $lbl  = '';
3635            if ( @$line )
3636            {
3637                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3638                #  Translate tree characters
3639                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3640  }  }
3641    
3642            # Convert to requested output format:
3643    
3644            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3645                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3646                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3647                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3648                       :                          ();
3649        }
3650    
3651        # if ( $cells )
3652        # {
3653        #     my $nmax = 0;
3654        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3655        #     foreach ( @lines )
3656        #     {
3657        #         @$_ = map { "<TD>$_</TD>" } @$_;
3658        #         my $span = $nmax - @$_ + 1;
3659        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3660        #     }
3661        # }
3662        # elsif ( $tables )
3663        # {
3664        #     my $nmax = 0;
3665        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3666        #     foreach ( @lines )
3667        #     {
3668        #         @$_ = map { "<TD>$_</TD>" } @$_;
3669        #         my $span = $nmax - @$_ + 1;
3670        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3671        #     }
3672        # }
3673    
3674        wantarray ? @lines : \@lines;
3675    }
3676    
3677    
3678  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3679  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3680  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3681  sub layout_printer_plot {  sub layout_printer_plot
3682      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3683        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3684      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3685      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3686    
3687      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3688      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3689          $dx *= $x_scale;          $dx *= $x_scale;
3690          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3691      }      }
3692      else {      else {
3693          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2881  Line 3714 
3714          $ymax = $y0;          $ymax = $y0;
3715    
3716          foreach ( @dl ) {          foreach ( @dl ) {
3717              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3718                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3719                                                            );
3720              push @ylist, $yi;              push @ylist, $yi;
3721              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3722          }          }
# Line 2891  Line 3726 
3726    
3727          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3728          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3729          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3730    
3731            #  Handle special case of internal node label. Put it between subtrees.
3732    
3733            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3734                #  Find the descendents $i1 and $i2 to put the branch between
3735                my $i2 = 1;
3736                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3737                my $i1 = $i2 - 1;
3738                #  Get bottom of subtree1 and top of subtree2:
3739                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3740                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3741                #  Midway between bottom of subtree1 and top of subtree2, with
3742                #  preferred rounding direction
3743                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3744            }
3745      }      }
3746    
3747      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2901  Line 3751 
3751  }  }
3752    
3753    
3754  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #  What symbol do we get if we add a leftward line to some other symbol?
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree {  
     my ( $node, $prefix ) = @_;  
     defined( $prefix ) or $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }  
     $prefix or print STDERR "\n";  
 }  
   
   
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree_hash {  
     my ( $node, $hash, $prefix ) = @_;  
     defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }  
 }  
3755    
3756    my %with_left_line = ( space  => 'half_l',
3757                           horiz  => 'horiz',
3758                           vert   => 'tee_l',
3759                           el_d_r => 'tee_d',
3760                           el_u_r => 'tee_u',
3761                           el_d_l => 'el_d_l',
3762                           el_u_l => 'el_u_l',
3763                           tee_l  => 'tee_l',
3764                           tee_r  => 'cross',
3765                           tee_u  => 'tee_u',
3766                           tee_d  => 'tee_d',
3767                           half_l => 'half_l',
3768                           half_r => 'horiz',
3769                           half_u => 'el_u_l',
3770                           half_d => 'el_d_l',
3771                           cross  => 'cross',
3772                         );
3773    
3774  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3775  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Produce a description of one line of a printer plot tree.
3776    #
3777    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3778    #
3779    #     \@line is the character descriptions accumulated so far, one per array
3780    #          element, except for a label, which can be any number of characters.
3781    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3782    #          then $line->[-2] is a label. The calling program translates the
3783    #          symbol names to output characters.
3784    #
3785    #     \@node is a newick tree node
3786    #     \%hash contains tree layout information
3787    #      $row  is the row number (y value) that we are building
3788    #      $symb is the plot symbol proposed for the current x and y position
3789    #      $ilbl is true if internal node labels are allowed
3790    #
3791  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3792  sub text_tree_row {  sub text_tree_row
3793      my ( $node, $hash, $row, $line, $symb ) = @_;  {
3794        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3795    
3796      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3797      if ( $row < $y1 || $row > $y2 ) { return $line }      if ( $row < $y1 || $row > $y2 ) { return $line }
3798    
3799      if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }      if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3800    
3801      if ( $row == $y ) {      if ( $row == $y ) {
3802          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");          while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
3803            push @$line, $symb,
3804                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
3805      }      }
3806    
3807      elsif ( $row > $yn1 && $row < $yn2 ) {      elsif ( $row > $yn1 && $row < $yn2 ) {
3808          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }          if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
3809          else { substr( $line, $x ) = "|" }          else               { $line->[$x] = 'vert' }
3810      }      }
3811    
3812      my @dl = newick_desc_list( $node );      my @dl = newick_desc_list( $node );
3813    
3814      if ( @dl < 1 ) {      if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
         $line .= " " . $node->[1];  
     }  
3815    
3816      else {      else {
3817          my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line          my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
3818          $list[ 0]->[1] = "/";          if ( @list > 1 ) { #  Fix top and bottom sympbols
3819          $list[-1]->[1] = "\\";              $list[ 0]->[1] = 'el_d_r';
3820                $list[-1]->[1] = 'el_u_r';
3821            }
3822            elsif ( @list ) {  # Only one descendent
3823                $list[ 0]->[1] = 'half_r';
3824            }
3825          foreach ( @list ) {          foreach ( @list ) {
3826              my ( $n, $s ) = @$_;              my ( $n, $s ) = @$_;
3827              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
3828                  $line = text_tree_row( $n, $hash, $row, $line, $s );                  $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
3829              }              }
3830           }           }
3831    
3832          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }          if ( $row == $y ) {
3833                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
3834                                                         : $with_left_line{ $line->[$x] };
3835                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
3836            }
3837      }      }
3838    
3839      return $line;      return $line;
3840  }  }
3841    
3842    
3843    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3844    #  Debug routine
3845    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3846    sub dump_tree {
3847        my ( $node, $prefix ) = @_;
3848        defined( $prefix ) or $prefix = "";
3849        print STDERR $prefix, join(", ", @$node), "\n";
3850        my @dl = $node->[0] ? @{$node->[0]} : ();
3851        foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }
3852        $prefix or print STDERR "\n";
3853    }
3854    
3855    
3856    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3857    #  Debug routine
3858    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3859    sub dump_tree_hash {
3860        my ( $node, $hash, $prefix ) = @_;
3861        defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";
3862        print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";
3863        my @dl = $node->[0] ? @{$node->[0]} : ();
3864        foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }
3865    }
3866    
3867    
3868  #===============================================================================  #===============================================================================
3869  #  Open an input file stream:  #  Open an input file stream:
3870  #  #
# Line 3012  Line 3906 
3906      return undef;      return undef;
3907  }  }
3908    
3909    
3910    #===============================================================================
3911    #  Some subroutines copied from gjolists
3912    #===============================================================================
3913    #  Return the common prefix of two lists:
3914    #
3915    #  @common = common_prefix( \@list1, \@list2 )
3916    #-----------------------------------------------------------------------------
3917    sub common_prefix
3918    {
3919        my ($l1, $l2) = @_;
3920        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3921        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3922    
3923        my $i = 0;
3924        my $l1_i;
3925        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
3926    
3927        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
3928    }
3929    
3930    
3931    #-----------------------------------------------------------------------------
3932    #  Return the unique suffixes of each of two lists:
3933    #
3934    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
3935    #-----------------------------------------------------------------------------
3936    sub unique_suffixes
3937    {
3938        my ($l1, $l2) = @_;
3939        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3940        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3941    
3942        my $i = 0;
3943        my @l1 = @$l1;
3944        my @l2 = @$l2;
3945        my $l1_i;
3946        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
3947    
3948        splice @l1, 0, $i;
3949        splice @l2, 0, $i;
3950        return ( \@l1, \@l2 );
3951    }
3952    
3953    
3954    #-------------------------------------------------------------------------------
3955    #  List of values duplicated in a list (stable in order by second occurance):
3956    #
3957    #  @dups = duplicates( @list )
3958    #-------------------------------------------------------------------------------
3959    sub duplicates
3960    {
3961        my %cnt = ();
3962        grep { ++$cnt{$_} == 2 } @_;
3963    }
3964    
3965    
3966    #-------------------------------------------------------------------------------
3967    #  Randomize the order of a list:
3968    #
3969    #  @random = random_order( @list )
3970    #-------------------------------------------------------------------------------
3971    sub random_order
3972    {
3973        my ( $i, $j );
3974        for ( $i = @_ - 1; $i > 0; $i-- )
3975        {
3976            $j = int( ($i+1) * rand() );
3977            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
3978        }
3979    
3980       @_;
3981    }
3982    
3983    
3984    #-----------------------------------------------------------------------------
3985    #  Intersection of two or more sets:
3986    #
3987    #  @intersection = intersection( \@set1, \@set2, ... )
3988    #-----------------------------------------------------------------------------
3989    sub intersection
3990    {
3991        my $set = shift;
3992        my @intersection = @$set;
3993    
3994        foreach $set ( @_ )
3995        {
3996            my %set = map { $_ => 1 } @$set;
3997            @intersection = grep { exists $set{ $_ } } @intersection;
3998        }
3999    
4000        @intersection;
4001    }
4002    
4003    
4004    #-----------------------------------------------------------------------------
4005    #  Elements in set 1, but not set 2:
4006    #
4007    #  @difference = set_difference( \@set1, \@set2 )
4008    #-----------------------------------------------------------------------------
4009    sub set_difference
4010    {
4011        my ($set1, $set2) = @_;
4012        my %set2 = map { $_ => 1 } @$set2;
4013        grep { ! ( exists $set2{$_} ) } @$set1;
4014    }
4015    
4016    
4017  1;  1;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.20

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3