[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Sun Feb 11 18:25:48 2007 UTC revision 1.23, Sat Sep 18 20:10:10 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2007 University of Chicago and Fellowship  # Copyright (c) 2003-2010 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 77  Line 79 
79  #  Tree format interconversion:  #  Tree format interconversion:
80  #===============================================================================  #===============================================================================
81  #  #
82    #  $bool      = is_overbeek_tree( $tree )
83    #  $bool      = is_gjonewick_tree( $tree )
84    #
85  #  $gjonewick = overbeek_to_gjonewick( $overbeek )  #  $gjonewick = overbeek_to_gjonewick( $overbeek )
86  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )  #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
87  #  #
# Line 107  Line 112 
112  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
113  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
114  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
115  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
116  #  #
117  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid  #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
118  #  #
# Line 118  Line 123 
123  #  #
124  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
125  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
126    # \@tiprefs = newick_tip_ref_list( $noderef )
127  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
128    # \@tips    = newick_tip_list( $noderef )
129    #
130  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
131  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
132    #
133  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
134    # \@tips    = newick_duplicated_tips( $noderef )
135    #
136  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
137    #
138  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
139    # \@tips    = newick_shared_tips( $tree1, $tree2 )
140  #  #
141  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
142    #
143    #  %tip_distances = newick_tip_distances( $noderef )
144    # \%tip_distances = newick_tip_distances( $noderef )
145    #
146  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
147  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
148  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
149    #
150    #  Provide a standard name by which two trees can be compared for same topology
151    #
152    #  $stdname = std_tree_name( $tree )
153  #  #
154  #  Tree tip insertion point (tip is on branch of length x that  #  Tree tip insertion point (tip is on branch of length x that
155  #  is inserted into branch connecting node1 and node2, a distance  #  is inserted into branch connecting node1 and node2, a distance
156  #  x1 from node1 and x2 from node2):  #  x1 from node1 and x2 from node2):
157  #  #
158  #  [ $node1, $x1, $node2, $x2, $x ]  #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
 #           = newick_tip_insertion_point( $tree, $tip )  
159  #  #
160  #  Standardized label for a node in terms of intersection of 3 lowest sorting  #  Standardized label for a node in terms of intersection of 3 lowest sorting
161  #  tips (sort is lower case):  #  tips (sort is lower case):
162  #  #
163  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
164  #  #
165  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
166  #  Paths from root of tree:  #  Paths from root of tree:
# Line 189  Line 209 
209  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
210  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
211  #  $node      = newick_rescale_branches( $node, $factor )  #  $node      = newick_rescale_branches( $node, $factor )
212    #  $node      = newick_modify_branches( $node, \&function )
213    #  $node      = newick_modify_branches( $node, \&function, \@func_parms )
214  #  #
215  #  Modify comments:  #  Modify comments:
216  #  #
# Line 206  Line 228 
228  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
229  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
230  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
231  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
232    #  $newtree = reroot_newick_to_midpoint( $tree )           # unweighted
233    #  $newtree = reroot_newick_to_midpoint_w( $tree )         # weight by tips
234  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
235  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
236  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
237  #  $newtree = uproot_newick( $tree )  #  $newtree = uproot_newick( $tree )
238  #  #
239  #  $newtree = prune_from_newick( $tree, $tip )  #  $newtree = prune_from_newick( $tree, $tip )
240    #  $newtree = rooted_newick_subtree( $tree,  @tips )
241    #  $newtree = rooted_newick_subtree( $tree, \@tips )
242  #  $newtree = newick_subtree( $tree,  @tips )  #  $newtree = newick_subtree( $tree,  @tips )
243  #  $newtree = newick_subtree( $tree, \@tips )  #  $newtree = newick_subtree( $tree, \@tips )
244    #  $newtree = newick_covering_subtree( $tree,  @tips )
245    #  $newtree = newick_covering_subtree( $tree, \@tips )
246  #  #
247  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
248  #  #
# Line 222  Line 250 
250  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )  #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
251  #  #
252  #===============================================================================  #===============================================================================
253    #  Tree neighborhood: subtree of n tips to represent a larger tree.
254    #===============================================================================
255    #
256    #  Focus around root:
257    #
258    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
259    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
260    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
261    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
262    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
263    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
264    #
265    #  Focus around a tip insertion point (the tip is not in the subtree):
266    #
267    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
268    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
269    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
270    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
271    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
272    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
273    #
274    #===============================================================================
275    #  Random trees
276    #===============================================================================
277    #
278    #   $tree = random_equibranch_tree(  @tips, \%options )
279    #   $tree = random_equibranch_tree( \@tips, \%options )
280    #   $tree = random_equibranch_tree(  @tips )
281    #   $tree = random_equibranch_tree( \@tips )
282    #
283    #   $tree = random_ultrametric_tree(  @tips, \%options )
284    #   $tree = random_ultrametric_tree( \@tips, \%options )
285    #   $tree = random_ultrametric_tree(  @tips )
286    #   $tree = random_ultrametric_tree( \@tips )
287    #
288    #===============================================================================
289  #  Tree reading and writing:  #  Tree reading and writing:
290  #===============================================================================  #===============================================================================
291    #  Write machine-readable trees:
292  #  #
293  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
294  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
# Line 231  Line 296 
296  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
297  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
298  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
299    #
300    #  Write human-readable trees:
301    #
302  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
303  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
304  #  #
305  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  Read trees:
306  #  @trees = read_newick_trees( $file ) # reads to end of file  #
307    #  $tree  = read_newick_tree( )
308    #  $tree  = read_newick_tree( \*FH )
309    #  $tree  = read_newick_tree( $file )
310    #
311    #  @trees = read_newick_trees( )
312    #  @trees = read_newick_trees( \*FH )
313    #  @trees = read_newick_trees( $file )
314    #
315  #  $tree  = parse_newick_tree_str( $string )  #  $tree  = parse_newick_tree_str( $string )
316  #  #
317  #===============================================================================  #===============================================================================
# Line 243  Line 319 
319    
320  use Carp;  use Carp;
321  use Data::Dumper;  use Data::Dumper;
322    use strict;
323    
324  require Exporter;  require Exporter;
325    
326  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
327  our @EXPORT = qw(  our @EXPORT = qw(
328            is_overbeek_tree
329            is_gjonewick_tree
330          overbeek_to_gjonewick          overbeek_to_gjonewick
331          gjonewick_to_overbeek          gjonewick_to_overbeek
   
332          newick_is_valid          newick_is_valid
333          newick_is_rooted          newick_is_rooted
334          newick_is_unrooted          newick_is_unrooted
335          tree_rooted_on_tip          tree_rooted_on_tip
336          newick_is_bifurcating          newick_is_bifurcating
337          newick_tip_count          newick_tip_count
338            newick_tip_ref_list
339          newick_tip_list          newick_tip_list
340    
341          newick_first_tip          newick_first_tip
342          newick_duplicated_tips          newick_duplicated_tips
343          newick_tip_in_tree          newick_tip_in_tree
344          newick_shared_tips          newick_shared_tips
345    
346          newick_tree_length          newick_tree_length
347            newick_tip_distances
348          newick_max_X          newick_max_X
349          newick_most_distant_tip_ref          newick_most_distant_tip_ref
350          newick_most_distant_tip_name          newick_most_distant_tip_name
351    
352          std_newick_name          newick_tip_insertion_point
353    
354            std_tree_name
355    
356          path_to_tip          path_to_tip
357          path_to_named_node          path_to_named_node
# Line 290  Line 373 
373          newick_set_all_branches          newick_set_all_branches
374          newick_fix_negative_branches          newick_fix_negative_branches
375          newick_rescale_branches          newick_rescale_branches
376            newick_modify_branches
377    
378          newick_strip_comments          newick_strip_comments
379    
# Line 305  Line 389 
389          reroot_newick_next_to_tip          reroot_newick_next_to_tip
390          reroot_newick_to_node          reroot_newick_to_node
391          reroot_newick_to_node_ref          reroot_newick_to_node_ref
392            reroot_newick_between_nodes
393            reroot_newick_to_midpoint
394            reroot_newick_to_midpoint_w
395          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
396          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
397          uproot_tip_rooted_newick          uproot_tip_rooted_newick
398          uproot_newick          uproot_newick
399    
400          prune_from_newick          prune_from_newick
401            rooted_newick_subtree
402          newick_subtree          newick_subtree
403            newick_covering_subtree
404          collapse_zero_length_branches          collapse_zero_length_branches
405    
406          newick_insert_at_node          newick_insert_at_node
407          newick_insert_between_nodes          newick_insert_between_nodes
408    
409            root_neighborhood_representative_tree
410            root_neighborhood_representative_tips
411            tip_neighborhood_representative_tree
412            tip_neighborhood_representative_tips
413    
414            random_equibranch_tree
415            random_ultrametric_tree
416    
417          writeNewickTree          writeNewickTree
418          fwriteNewickTree          fwriteNewickTree
419          strNewickTree          strNewickTree
# Line 362  Line 459 
459          );          );
460    
461    
 use gjolists qw(  
         common_prefix  
         unique_suffixes  
   
         duplicates  
         random_order  
   
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
462  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
463  #  Internally used definitions  #  Internally used definitions
464  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
465    
466  sub array_ref { ref( $_[0] ) eq "ARRAY" }  sub array_ref { $_[0] && ref( $_[0] ) eq 'ARRAY' }
467  sub hash_ref  { ref( $_[0] ) eq "HASH"  }  sub hash_ref  { $_[0] && ref( $_[0] ) eq 'HASH'  }
468    
469    
470  #===============================================================================  #===============================================================================
471  #  Interconvert Overbeek and gjonewick trees:  #  Interconvert overbeek and gjonewick trees:
472  #===============================================================================  #===============================================================================
473    
474    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
475    
476    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
477    
478  sub overbeek_to_gjonewick  sub overbeek_to_gjonewick
479  {  {
480      return () unless ref( $_[0] ) eq 'ARRAY';      return () unless ref( $_[0] ) eq 'ARRAY';
# Line 408  Line 494 
494      return $node;      return $node;
495  }  }
496    
497    
498  #===============================================================================  #===============================================================================
499  #  Extract tree structure values:  #  Extract tree structure values:
500  #===============================================================================  #===============================================================================
# Line 428  Line 515 
515  #  #
516  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
517    
518  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() }  # = ${$_[0]}[0]
519  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
520  sub newick_x        { $_[0]->[2] }  sub newick_x        { ref($_[0]) ? $_[0]->[2] : Carp::confess() }
521  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { ref($_[0]) ? $_[0]->[3] : Carp::confess() }
522  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { ref($_[0]) ? $_[0]->[4] : Carp::confess() }
523  sub newick_c3       { $_[0]->[5] }  sub newick_c3       { ref($_[0]) ? $_[0]->[5] : Carp::confess() }
524  sub newick_c4       { $_[0]->[6] }  sub newick_c4       { ref($_[0]) ? $_[0]->[6] : Carp::confess() }
525  sub newick_c5       { $_[0]->[7] }  sub newick_c5       { ref($_[0]) ? $_[0]->[7] : Carp::confess() }
526    
527  sub newick_desc_list {  sub newick_desc_list {
528      my $node = $_[0];      my $node = $_[0];
529      ! array_ref( $node      ) ? undef           :      array_ref( $node ) && array_ref( $node->[0] ) ? @{ $node->[0] } : ();
       array_ref( $node->[0] ) ? @{ $node->[0] } :  
                                 ()              ;  
530  }  }
531    
532  sub newick_n_desc {  sub newick_n_desc {
533      my $node = $_[0];      my $node = $_[0];
534      ! array_ref( $node      ) ? undef                  :      array_ref( $node ) && array_ref( $node->[0] ) ? scalar @{ $node->[0] } : 0;
       array_ref( $node->[0] ) ? scalar @{ $node->[0] } :  
                                 0                      ;  
535  }  }
536    
537  sub newick_desc_i {  sub newick_desc_i {
538      my ( $node, $i ) = @_;      my ( $node, $i ) = @_;
539      ! array_ref( $node      ) ? undef              :      array_ref( $node ) && $i && array_ref( $node->[0] ) ? $node->[0]->[$i-1] : undef;
       array_ref( $node->[0] ) ? $node->[0]->[$i-1] :  
                                 undef              ;  
540  }  }
541    
542  sub node_is_tip {  sub node_is_tip {
# Line 641  Line 722 
722  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
723  #  List of tip nodes:  #  List of tip nodes:
724  #  #
725  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
726    # \@tips = newick_tip_ref_list( $noderef )
727  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
728  sub newick_tip_ref_list {  sub newick_tip_ref_list {
729      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 658  Line 740 
740          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
741      }      }
742    
743      @list;      wantarray ? @list : \@list;
744  }  }
745    
746    
# Line 666  Line 748 
748  #  List of tips:  #  List of tips:
749  #  #
750  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
751    # \@tips = newick_tip_list( $node )
752  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
753  sub newick_tip_list {  sub newick_tip_list {
754      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
755        wantarray ? @tips : \@tips;
756  }  }
757    
758    
# Line 707  Line 791 
791  #  List of duplicated tip labels.  #  List of duplicated tip labels.
792  #  #
793  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
794    # \@tips = newick_duplicated_tips( $node )
795  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
796  sub newick_duplicated_tips {  sub newick_duplicated_tips {
797      gjolists::duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
798        wantarray ? @tips : \@tips;
799  }  }
800    
801    
# Line 740  Line 826 
826  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
827  #  #
828  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
829    # \@tips = newick_shared_tips( $tree1, $tree2 )
830  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
831  sub newick_shared_tips {  sub newick_shared_tips {
832      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
833      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
834      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
835      gjolists::intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
836        wantarray ? @tips : \@tips;
837  }  }
838    
839    
# Line 767  Line 855 
855    
856    
857  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
858    #  Hash of tip nodes and corresponding distances from root:
859    #
860    #   %tip_distances = newick_tip_distances( $node )
861    #  \%tip_distances = newick_tip_distances( $node )
862    #-------------------------------------------------------------------------------
863    sub newick_tip_distances
864    {
865        my ( $node, $x, $hash ) = @_;
866        my $root = ! $hash;
867        ref( $hash ) eq 'HASH' or $hash = {};
868    
869        $x ||= 0;
870        $x  += newick_x( $node ) || 0;
871    
872        #  Is it a tip?
873    
874        my $n_desc = newick_n_desc( $node );
875        if ( ! $n_desc )
876        {
877            $hash->{ newick_lbl( $node ) } = $x;
878            return $hash;
879        }
880    
881        #  Tree rooted on tip?
882    
883        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
884        {
885            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
886        }
887    
888        foreach ( newick_desc_list( $node ) )
889        {
890            newick_tip_distances( $_, $x, $hash );
891        }
892    
893        wantarray ? %$hash : $hash;
894    }
895    
896    
897    #-------------------------------------------------------------------------------
898  #  Tree max X.  #  Tree max X.
899  #  #
900  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 910  Line 1038 
1038    
1039      else      else
1040      {      {
1041          my ( $n1, $x1 ) = describe_desc( $dl->[0] );          my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1042          my ( $n2, $x2 ) = describe_desc( $dl->[1] );          my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1043    
1044          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }          if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1045          if ( @$n2 == 2 )          if ( @$n2 == 2 )
# Line 926  Line 1054 
1054  }  }
1055    
1056    
1057  sub describe_desc  sub describe_descendant
1058  {  {
1059      my $node = shift;      my $node = shift;
1060    
# Line 951  Line 1079 
1079      #  Return the two lowest of those (the third will come from the      #  Return the two lowest of those (the third will come from the
1080      #  other side of the original node).      #  other side of the original node).
1081    
     else  
     {  
1082          my @rep_tips = sort { lc $a cmp lc $b }          my @rep_tips = sort { lc $a cmp lc $b }
1083                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }                         map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1084                         @$dl;                         @$dl;
1085          return ( [ @rep_tips[0,1] ], $x );          return ( [ @rep_tips[0,1] ], $x );
1086      }      }
 }  
1087    
1088    
1089  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 967  Line 1092 
1092  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1093  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1094  #  #
1095  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1096  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1097  sub std_node_name {  sub std_node_name {
1098      my $tree = $_[0];      my $tree = $_[0];
# Line 985  Line 1110 
1110      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1111    
1112      my @rest = newick_tip_list( $tree );      my @rest = newick_tip_list( $tree );
1113      my @best = map {      my @best = map
1114              {
1115              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1116              @rest = gjolists::set_difference( \@rest, \@tips );              @rest = &set_difference( \@rest, \@tips );
1117              $tips[0];              $tips[0];
1118          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1119    
# Line 1075  Line 1201 
1201      my $imax = newick_n_desc( $node );      my $imax = newick_n_desc( $node );
1202      for ( my $i = 1; $i <= $imax; $i++ ) {      for ( my $i = 1; $i <= $imax; $i++ ) {
1203         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );         @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
1204         if ( @path ) { return @path }          return @path if @path;
1205      }      }
1206    
1207      ();  #  Not found      ();  #  Not found
# Line 1106  Line 1232 
1232      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1233    
1234      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1235      my @p12 = gjolists::common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1236      my @p13 = gjolists::common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1237      my @p23 = gjolists::common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1238    
1239      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1240      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 1159  Line 1285 
1285      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1286    
1287      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1288      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1289      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1290      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1291    
# Line 1184  Line 1310 
1310      my @p2 = path_to_node( $node, $node2 ) or return undef;      my @p2 = path_to_node( $node, $node2 ) or return undef;
1311    
1312      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1313      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1314      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1315      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1316    
# Line 1435  Line 1561 
1561    
1562    
1563  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1564    #  Modify all branch lengths by a function.
1565    #
1566    #     $node = newick_modify_branches( $node, \&function )
1567    #     $node = newick_modify_branches( $node, \&function, \@func_parms )
1568    #
1569    #  Function must have form
1570    #
1571    #     $x2 = &$function( $x1 )
1572    #     $x2 = &$function( $x1, @$func_parms )
1573    #
1574    #-------------------------------------------------------------------------------
1575    sub newick_modify_branches {
1576        my ( $node, $func, $parm ) = @_;
1577    
1578        set_newick_x( $node, &$func( newick_x( $node ), ( $parm ? @$parm : () ) ) );
1579        foreach ( newick_desc_list( $node ) )
1580        {
1581            newick_modify_branches( $_, $func, $parm )
1582        }
1583    
1584        $node;
1585    }
1586    
1587    
1588    #-------------------------------------------------------------------------------
1589  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1590  #  #
1591  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1523  Line 1674 
1674    
1675    
1676  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1677    #  Standard name for a Newick tree topology
1678    #
1679    #    $stdname = std_tree_name( $tree )
1680    #
1681    #-------------------------------------------------------------------------------
1682    sub std_tree_name
1683    {
1684        my ( $tree ) = @_;
1685        my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
1686        ( std_tree_name_2( reroot_newick_next_to_tip( copy_newick_tree( $tree ), $mintip ) ) )[0];
1687    }
1688    
1689    
1690    #
1691    #  ( $name, $mintip ) = std_tree_name_2( $node )
1692    #
1693    sub std_tree_name_2
1694    {
1695        my ( $node ) = @_;
1696    
1697        my @descends = newick_desc_list( $node );
1698        if ( @descends == 0 )
1699        {
1700            my $lbl = newick_lbl( $node );
1701            return ( $lbl, $lbl );
1702        }
1703    
1704        my @list = sort { lc $a->[1] cmp lc $b->[1] || $a->[1] cmp $b->[1] }
1705                   map  { [ std_tree_name_2( $_ ) ] }
1706                   @descends;
1707        my $mintip = $list[0]->[1];
1708        my $name   = '(' . join( "\t", map { $_->[0] } @list ) . ')';
1709    
1710        return ( $name, $mintip );
1711    }
1712    
1713    
1714    #-------------------------------------------------------------------------------
1715  #  Move largest groups to periphery of tree (in place).  #  Move largest groups to periphery of tree (in place).
1716  #  #
1717  #      dir  <= -2 for up-sweeping tree (big groups always first),  #      dir  <= -2 for up-sweeping tree (big groups always first),
# Line 1582  Line 1771 
1771      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1772      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1773    
     #  Reorder this subtree:  
   
1774      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1775      if    ( $dir < 0 ) {                   #  Big group first  
1776          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1777    
1778        if ( $dir )
1779        {
1780            #  Big group first
1781            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1782    
1783            my ( @dl1, @dl2 );
1784            for ( my $i = 0; $i < $nd; $i++ ) {
1785                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1786      }      }
1787      elsif ( $dir > 0 ) {                   #  Small group first  
1788          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1789                                    : ( @dl2, reverse @dl1 );
1790      }      }
1791    
1792      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 1621  Line 1818 
1818  #  #
1819  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1820  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1821  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1822    {
1823      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1824      my %cnt;      my %cnt;
1825    
# Line 1641  Line 1839 
1839  #           = 0 for no change, and  #           = 0 for no change, and
1840  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1841  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1842  sub reorder_against_tip_count {  sub reorder_against_tip_count
1843    {
1844      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1845    
1846      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1680  Line 1879 
1879  #  #
1880  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1881  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1882  sub random_order_newick_tree {  sub random_order_newick_tree
1883    {
1884      my ( $node ) = @_;      my ( $node ) = @_;
1885    
1886      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1689  Line 1889 
1889      #  Reorder this subtree:      #  Reorder this subtree:
1890    
1891      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1892      @$dl_ref = gjolists::random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1893    
1894      #  Reorder descendants:      #  Reorder descendants:
1895    
# Line 1704  Line 1904 
1904  #  #
1905  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1906  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1907  sub reroot_newick_by_path {  sub reroot_newick_by_path
1908    {
1909      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1910      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1911    
# Line 1812  Line 2013 
2013    
2014    
2015  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2016    #  Reroot a newick tree along the path between 2 nodes:
2017    #
2018    #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
2019    #-------------------------------------------------------------------------------
2020    sub reroot_newick_between_nodes
2021    {
2022        my ( $tree, $node1, $node2, $fraction ) = @_;
2023        array_ref( $tree ) or return undef;
2024        $fraction >= 0 && $fraction <= 1 or return undef;
2025    
2026        #  Find the paths to the nodes:
2027    
2028        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2029        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2030    
2031        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2032    }
2033    
2034    
2035    #-------------------------------------------------------------------------------
2036    #  Reroot a newick tree along the path between 2 nodes:
2037    #
2038    #  $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction )
2039    #-------------------------------------------------------------------------------
2040    sub reroot_newick_between_node_refs
2041    {
2042        my ( $tree, $node1, $node2, $fraction ) = @_;
2043        array_ref( $tree ) or return undef;
2044    
2045        #  Find the paths to the nodes:
2046    
2047        my @path1 = path_to_node_ref( $tree, $node1 ) or return undef;
2048        my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;;
2049    
2050        reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction )
2051    }
2052    
2053    
2054    #-------------------------------------------------------------------------------
2055    #  Reroot a newick tree along the path between 2 nodes defined by paths:
2056    #
2057    #  $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction )
2058    #-------------------------------------------------------------------------------
2059    sub reroot_newick_between_nodes_by_path
2060    {
2061        my ( $tree, $path1, $path2, $fraction ) = @_;
2062        array_ref( $tree ) and array_ref( $path1 ) and  array_ref( $path2 )
2063           or return undef;
2064        $fraction >= 0 && $fraction <= 1 or return undef;
2065    
2066        my @path1 = @$path1;
2067        my @path2 = @$path2;
2068    
2069        #  Trim the common prefix, saving it:
2070    
2071        my @prefix = ();
2072        while ( defined( $path1[1] ) && defined( $path2[1] ) && ( $path1[1] == $path2[1] ) )
2073        {
2074            push @prefix, splice( @path1, 0, 2 );
2075            splice( @path2, 0, 2 );
2076        }
2077    
2078        my ( @path, $dist );
2079        if    ( @path1 < 3 )
2080        {
2081            @path2 >= 3 or return undef;              # node1 = node2
2082            $dist = $fraction * newick_path_length( @path2 );
2083            @path = @path2;
2084        }
2085        elsif ( @path2 < 3 )
2086        {
2087            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2088            @path = @path1;
2089        }
2090        else
2091        {
2092            my $dist1 = newick_path_length( @path1 );
2093            my $dist2 = newick_path_length( @path2 );
2094            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2095            @path = ( $dist <= 0 ) ? @path1 : @path2;
2096            $dist = abs( $dist );
2097        }
2098    
2099        #  Descend tree until we reach the insertion branch:
2100    
2101        my $x;
2102        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2103        {
2104            $dist -= $x;
2105            push @prefix, splice( @path, 0, 2 );
2106        }
2107    
2108        #  Insert the new node:
2109    
2110        my $newnode = [ [ $path[2] ], undef, $dist ];
2111        set_newick_desc_i( $path[0], $path[1], $newnode );
2112        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2113    
2114        #  We can now build the path from root to the new node
2115    
2116        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
2117    }
2118    
2119    
2120    #-------------------------------------------------------------------------------
2121  #  Move root of tree to an approximate midpoint.  #  Move root of tree to an approximate midpoint.
2122  #  #
2123  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $newtree = reroot_newick_to_approx_midpoint( $tree )
# Line 1823  Line 2129 
2129    
2130      my $dists1 = average_to_tips_1( $tree );      my $dists1 = average_to_tips_1( $tree );
2131    
2132      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint
2133        #  cadidates as a list of [ $node1, $node2, $fraction ]
2134    
2135        my @mids = average_to_tips_2( $dists1, undef, undef );
2136    
2137        #  Reroot to first midpoint candidate
2138    
2139        return $tree if ! @mids;
2140        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2141        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2142    }
2143    
2144    
2145    #-------------------------------------------------------------------------------
2146    #  Move root of tree to a midpoint.
2147    #
2148    #  $newtree = reroot_newick_to_midpoint( $tree )
2149    #-------------------------------------------------------------------------------
2150    sub reroot_newick_to_midpoint {
2151        my ( $tree ) = @_;
2152    
2153        #  Compile average tip to node distances assending
2154    
2155      my $node = average_to_tips_2( $dists1, undef, undef );      my $dists1 = average_to_tips_1( $tree );
2156    
2157        #  Compile average tip to node distances descending, returning midpoint
2158        #  [ $node1, $node2, $fraction ]
2159    
2160      #  Reroot      my @mids = average_to_tips_2( $dists1, undef, undef );
2161    
2162      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2163  }  }
2164    
2165    
2166    #-------------------------------------------------------------------------------
2167    #  Compile average tip to node distances assending
2168    #-------------------------------------------------------------------------------
2169  sub average_to_tips_1 {  sub average_to_tips_1 {
2170      my ( $node ) = @_;      my ( $node ) = @_;
2171    
# Line 1843  Line 2176 
2176          foreach ( @desc_dists ) { $x_below += $_->[0] }          foreach ( @desc_dists ) { $x_below += $_->[0] }
2177          $x_below /= @desc_dists;          $x_below /= @desc_dists;
2178      }      }
2179    
2180      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2181      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2182    
# Line 1850  Line 2184 
2184  }  }
2185    
2186    
2187    #-------------------------------------------------------------------------------
2188    #  Compile average tip to node distances descending, returning midpoint as
2189    #  [ $node1, $node2, $fraction_of_dist_between ]
2190    #-------------------------------------------------------------------------------
2191  sub average_to_tips_2 {  sub average_to_tips_2 {
2192      my ( $dists1, $x_above, $anc_node ) = @_;      my ( $dists1, $x_above, $anc_node ) = @_;
2193      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;
2194    
2195      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2196    
2197      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2198      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2199      {      {
2200          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1872  Line 2206 
2206    
2207          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2208          {          {
2209              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2210          }              #  the way from $node to $anc_node:
2211          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2212          {                                     : 0.5;
2213              return undef;              push @mids, [ $node, $anc_node, $fract ];
2214          }          }
2215      }      }
2216    
2217      #  The root must be somewhere below this node:      #  The root might be somewhere below this node:
2218    
2219      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );      my $n_1      =   @$desc_list - ( $anc_node ? 0 : 1 );
2220      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );      my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );
# Line 1890  Line 2224 
2224          #  If input tree is tip_rooted, $n-1 can be 0, so:          #  If input tree is tip_rooted, $n-1 can be 0, so:
2225    
2226          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;          my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
2227          my $root = average_to_tips_2( $_, $above2, $node );          push @mids, average_to_tips_2( $_, $above2, $node );
         if ( $root ) { return $root }  
2228      }      }
2229    
2230      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2231  }  }
2232    
2233    
# Line 1907  Line 2238 
2238  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2239  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2240      my ( $tree ) = @_;      my ( $tree ) = @_;
2241        array_ref( $tree ) or return undef;
2242    
2243        #  Compile average tip to node distances assending from tips
2244    
2245        my $dists1 = average_to_tips_1_w( $tree );
2246    
2247        #  Compile average tip to node distances descending, returning midpoints
2248    
2249        my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2250    
2251        #  Reroot to first midpoint candidate
2252    
2253        return $tree if ! @mids;
2254        my ( $node1, $node2, $fraction ) = @{ $mids[0] };
2255        reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 );
2256    }
2257    
2258    
2259    #-------------------------------------------------------------------------------
2260    #  Move root of tree to an approximate midpoint.  Weight by tips.
2261    #
2262    #  $newtree = reroot_newick_to_midpoint_w( $tree )
2263    #-------------------------------------------------------------------------------
2264    sub reroot_newick_to_midpoint_w {
2265        my ( $tree ) = @_;
2266        array_ref( $tree ) or return ();
2267    
2268      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2269    
# Line 1914  Line 2271 
2271    
2272      #  Compile average tip to node distances descending, returning midpoint node      #  Compile average tip to node distances descending, returning midpoint node
2273    
2274      my $node = average_to_tips_2_w( $dists1, undef, undef, undef );      my @mids = average_to_tips_2_w( $dists1, undef, undef, undef );
2275    
2276      #  Reroot      #  Reroot at first candidate midpoint
2277    
2278      $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree      @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree;
2279  }  }
2280    
2281    
2282    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2283  sub average_to_tips_1_w {  sub average_to_tips_1_w {
2284      my ( $node ) = @_;      my ( $node ) = @_;
2285    
# Line 1939  Line 2297 
2297          }          }
2298          $x_below /= $n_below;          $x_below /= $n_below;
2299      }      }
2300    
2301      my $x = newick_x( $node ) || 0;      my $x = newick_x( $node ) || 0;
2302      my $x_net = $x_below + $x;      my $x_net = $x_below + $x;
2303    
# Line 1946  Line 2305 
2305  }  }
2306    
2307    
2308    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2309  sub average_to_tips_2_w {  sub average_to_tips_2_w {
2310      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;
2311      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;
2312    
2313      #  Are we done?  Root is in this node's branch, or "above"?      #  Are we done?  Root is in this node's branch, or "above"?
2314    
2315      # defined( $x_above ) and print STDERR "x_above = $x_above\n";      my @mids = ();
     # print STDERR "x       = $x\n";  
     # print STDERR "x_below = $x_below\n";  
     # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";  
   
2316      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )      if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
2317      {      {
2318          #  At this point the root can only be in this node's branch,          #  At this point the root can only be in this node's branch,
# Line 1964  Line 2320 
2320          #  would mean that the midpoint is actually down a different          #  would mean that the midpoint is actually down a different
2321          #  path from the root of the current tree).          #  path from the root of the current tree).
2322          #          #
2323          #  Is the root in the current branch?          #  Is their a root in the current branch?
2324    
2325          if ( ( $x_below + $x ) >= $x_above )          if ( ( $x_below + $x ) >= $x_above )
2326          {          {
2327              return ( $x_above >= $x_below ) ? $anc_node : $node;              #  We will need to make a new node for the root, $fract of
2328          }              #  the way from $node to $anc_node:
2329          else              my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 )
2330          {                                     : 0.5;
2331              return undef;              push @mids, [ $node, $anc_node, $fract ];
2332          }          }
2333      }      }
2334    
# Line 1992  Line 2348 
2348    
2349          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )          my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 )
2350                                   : 0;                                   : 0;
2351          my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );          push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node );
         if ( $root ) { return $root }  
2352      }      }
2353    
2354      #  Was not anywhere below this node (oh-oh):      return @mids;
   
     return undef;  
2355  }  }
2356    
2357    
# Line 2313  Line 2666 
2666    
2667    
2668  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2669  #  Produce a subtree with the desired tips:  #  Produce a potentially rooted subtree with the desired tips:
2670    #
2671    #     Except for (some) tip nodes, the tree produced is a copy.
2672    #     There is no check that requested tips exist.
2673    #
2674    #  $newtree = rooted_newick_subtree( $tree,  @tips )
2675    #  $newtree = rooted_newick_subtree( $tree, \@tips )
2676    #-------------------------------------------------------------------------------
2677    sub rooted_newick_subtree {
2678        my ( $tr, @tips ) = @_;
2679        if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
2680    
2681        if ( @tips < 2 ) { return undef }
2682        my $keephash = { map { ( $_, 1 ) } @tips };
2683        my $tr2 = subtree1( $tr, $keephash );
2684        $tr2->[2] = undef if $tr2;                   # undef root branch length
2685        $tr2;
2686    }
2687    
2688    
2689    #-------------------------------------------------------------------------------
2690    #  Produce a subtree with the desired tips:
2691  #  #
2692  #     Except for (some) tip nodes, the tree produced is a copy.  #     Except for (some) tip nodes, the tree produced is a copy.
2693  #     There is no check that requested tips exist.  #     There is no check that requested tips exist.
# Line 2386  Line 2760 
2760  }  }
2761    
2762    
2763    #-------------------------------------------------------------------------------
2764    #  The smallest subtree of rooted tree that includes @tips:
2765    #
2766    #    $node = newick_covering_subtree( $tree,  @tips )
2767    #    $node = newick_covering_subtree( $tree, \@tips )
2768    #-------------------------------------------------------------------------------
2769    
2770    sub newick_covering_subtree {
2771        my $tree = shift;
2772        my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ );
2773    
2774        #  Return smallest covering node, if any:
2775    
2776        ( newick_covering_subtree( $tree, \%tips ) )[ 0 ];
2777    }
2778    
2779    
2780    sub newick_covering_subtree_1 {
2781        my ( $node, $tips ) = @_;
2782        my $n_cover = 0;
2783        my @desc = newick_desc_list( $node );
2784        if ( @desc )
2785        {
2786            foreach ( @desc )
2787            {
2788                my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips );
2789                return ( $subtree, $n ) if $subtree;
2790                $n_cover += $n;
2791            }
2792        }
2793        elsif ( $tips->{ newick_lbl( $node ) } )
2794        {
2795            $n_cover++;
2796        }
2797    
2798        #  If all tips are covered, return node
2799    
2800        ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover );
2801    }
2802    
2803    
2804    #===============================================================================
2805    #
2806    #  Representative subtrees
2807    #
2808    #===============================================================================
2809    #  Find subtree of size n representating vicinity of the root:
2810    #
2811    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2812    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2813    #
2814    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2815    #  consequences on downstream programs.
2816    #-------------------------------------------------------------------------------
2817    sub root_neighborhood_representative_tree
2818    {
2819        my ( $tree, $n, $tip_priority ) = @_;
2820        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2821        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2822    
2823        $tip_priority ||= default_tip_priority( $tree );
2824        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2825                   root_proximal_newick_subtrees( $tree, $n );
2826    
2827        newick_subtree( copy_newick_tree( $tree ), \@tips );
2828    }
2829    
2830    
2831    #-------------------------------------------------------------------------------
2832    #  Find n tips to represent tree lineages in vicinity of another tip.
2833    #  Default tip priority is short total branch length.
2834    #
2835    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2836    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2837    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2838    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2839    #-------------------------------------------------------------------------------
2840    sub root_neighborhood_representative_tips
2841    {
2842        my ( $tree, $n, $tip_priority ) = @_;
2843        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2844    
2845        my @tips;
2846        if ( newick_tip_count( $tree ) <= $n )
2847        {
2848            @tips = newick_tip_list( $tree );
2849        }
2850        else
2851        {
2852            $tip_priority ||= default_tip_priority( $tree );
2853            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2854                    root_proximal_newick_subtrees( $tree, $n );
2855        }
2856    
2857        wantarray ? @tips : \@tips;
2858    }
2859    
2860    
2861    #-------------------------------------------------------------------------------
2862    #  Find subtree of size n representating vicinity of a tip:
2863    #
2864    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2865    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2866    #-------------------------------------------------------------------------------
2867    sub tip_neighborhood_representative_tree
2868    {
2869        my ( $tree, $tip, $n, $tip_priority ) = @_;
2870        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2871        newick_tip_in_tree( $tree, $tip ) or return undef;
2872    
2873        my $tree1 = copy_newick_tree( $tree );
2874        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2875        {
2876            return prune_from_newick( $tree1, $tip )
2877        }
2878    
2879        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2880        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2881        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2882        newick_subtree( copy_newick_tree( $tree ), \@tips );
2883    }
2884    
2885    
2886    #-------------------------------------------------------------------------------
2887    #  Find n tips to represent tree lineages in vicinity of another tip.
2888    #  Default tip priority is short total branch length.
2889    #
2890    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2891    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2892    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2893    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2894    #-------------------------------------------------------------------------------
2895    sub tip_neighborhood_representative_tips
2896    {
2897        my ( $tree, $tip, $n, $tip_priority ) = @_;
2898        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2899        newick_tip_in_tree( $tree, $tip ) or return undef;
2900    
2901        my @tips = newick_tip_list( $tree );
2902        if ( newick_tip_count( $tree ) - 1 <= $n )
2903        {
2904            @tips = grep { $_ ne $tip } @tips;
2905        }
2906        else
2907        {
2908            my $tree1 = copy_newick_tree( $tree );
2909            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2910            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2911            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2912        }
2913    
2914        wantarray ? @tips : \@tips;
2915    }
2916    
2917    
2918    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2919    #  Anonymous hash of the negative distance from root to each tip:
2920    #
2921    #   \%tip_priority = default_tip_priority( $tree )
2922    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2923    sub default_tip_priority
2924    {
2925        my ( $tree ) = @_;
2926        my $tip_distances = newick_tip_distances( $tree ) || {};
2927        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2928    }
2929    
2930    
2931    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2932    #  Select a tip from a subtree base on a priority value:
2933    #
2934    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2935    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2936    sub representative_tip_of_newick_node
2937    {
2938        my ( $node, $tip_priority ) = @_;
2939        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2940                      map  { [ $_, $tip_priority->{ $_ } ] }
2941                      newick_tip_list( $node );
2942        $tip->[0];                                   # Label from label-priority pair
2943    }
2944    
2945    
2946    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2947    #  Find n subtrees focused around the root of a tree.  Typically each will
2948    #  then be reduced to a single tip to make a representative tree:
2949    #
2950    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2951    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2952    sub root_proximal_newick_subtrees
2953    {
2954        my ( $tree, $n ) = @_;
2955        my $node_start_end = newick_branch_intervals( $tree );
2956        n_representative_branches( $n, $node_start_end );
2957    }
2958    
2959    
2960    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2961    #   @node_start_end = newick_branch_intervals( $tree )
2962    #  \@node_start_end = newick_branch_intervals( $tree )
2963    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2964    sub newick_branch_intervals
2965    {
2966        my ( $node, $parent_x ) = @_;
2967        $parent_x ||= 0;
2968        my ( $desc, undef, $dx ) = @$node;
2969        my $x = $parent_x + $dx;
2970        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2971        my @intervals = ( $interval,
2972                          map { &newick_branch_intervals( $_, $x ) } @$desc
2973                        );
2974        return wantarray ? @intervals : \@intervals;
2975    }
2976    
2977    
2978    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2979    #   @ids = n_representative_branches( $n,  @id_start_end )
2980    #   @ids = n_representative_branches( $n, \@id_start_end )
2981    #  \@ids = n_representative_branches( $n,  @id_start_end )
2982    #  \@ids = n_representative_branches( $n, \@id_start_end )
2983    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2984    sub n_representative_branches
2985    {
2986        my $n = shift;
2987        #  Sort intervals by start point:
2988        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2989                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2990        my @active = ();
2991        my ( $interval, $current_point );
2992        foreach $interval ( @unprocessed )
2993        {
2994            $current_point = $interval->[1];
2995            #  Filter out intervals that have ended.  This is N**2 in the number
2996            #  of representatives.  Fixing this would require maintaining a sorted
2997            #  active list.
2998            @active = grep { $_->[2] > $current_point } @active;
2999            push @active, $interval;
3000            last if ( @active >= $n );
3001        }
3002    
3003        my @ids = map { $_->[0] } @active;
3004        return wantarray() ? @ids : \@ids;
3005    }
3006    
3007    
3008    #===============================================================================
3009    #  Random trees
3010    #===============================================================================
3011    #
3012    #   $tree = random_equibranch_tree(  @tips, \%options )
3013    #   $tree = random_equibranch_tree( \@tips, \%options )
3014    #   $tree = random_equibranch_tree(  @tips )
3015    #   $tree = random_equibranch_tree( \@tips )
3016    #
3017    #  Options:
3018    #
3019    #     length => $branch_length   # D = 1
3020    #
3021    #-------------------------------------------------------------------------------
3022    sub random_equibranch_tree
3023    {
3024        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3025                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3026                 :                                  {};
3027        return undef if ! defined $_[0];
3028    
3029        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3030        return undef if @tips < 2;
3031    
3032        my $len = $opts->{ length } ||= 1;
3033    
3034        if ( @tips == 2 )
3035        {
3036            return [ [ map { [ [], $_, $len ] } @tips ], undef, 0 ];
3037        }
3038    
3039        my $tree = [ [ ], undef, 0 ];
3040    
3041        my @links;  # \$anc_dl[i], i.e. a reference to an element in a descendent list
3042    
3043        my $anc_dl = $tree->[0];
3044        foreach my $tip ( splice( @tips, 0, 3 ) )
3045        {
3046            my $node = [ [], $tip, $len ];
3047            push @$anc_dl, $node;
3048            push @links, \$anc_dl->[-1];  #  Ref to the just added descendent list entry
3049        }
3050    
3051        foreach my $tip ( @tips )
3052        {
3053            my $link    = $links[ int( rand( scalar @links ) ) ];
3054            my $newtip  = [ [], $tip, $len ];
3055            my $new_dl  = [ $$link, $newtip ];
3056            my $newnode = [ $new_dl, undef, $len ];
3057            $$link = $newnode;
3058            push @links, \$new_dl->[0], \$new_dl->[1]
3059        }
3060    
3061        return $tree;
3062    }
3063    
3064    
3065    #-------------------------------------------------------------------------------
3066    #
3067    #   $tree = random_ultrametric_tree(  @tips, \%options )
3068    #   $tree = random_ultrametric_tree( \@tips, \%options )
3069    #   $tree = random_ultrametric_tree(  @tips )
3070    #   $tree = random_ultrametric_tree( \@tips )
3071    #
3072    #  Options:
3073    #
3074    #     depth => $root_to_tip_dist   # D = 1
3075    #
3076    #-------------------------------------------------------------------------------
3077    sub random_ultrametric_tree
3078    {
3079        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3080                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3081                 :                                  {};
3082        return undef if ! defined $_[0];
3083    
3084        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3085        return undef if @tips < 2;
3086    
3087        my $d2tip = $opts->{ depth } ||= 1;
3088    
3089        #  Random tip addition order (for rooted tree it matters):
3090    
3091        @tips = sort { rand() <=> 0.5 } @tips;
3092        my $tree = [ [ ], undef, 0 ];
3093    
3094        my $subtree_size = { $tree => 0 };  # total branch length of each subtree
3095    
3096        #  We start with root bifurcation:
3097    
3098        foreach my $tip ( splice( @tips, 0, 2 ) )
3099        {
3100            my $node = [ [], $tip, $d2tip ];
3101            push @{ $tree->[0] }, $node;
3102            $subtree_size->{ $node }  = $d2tip;
3103            $subtree_size->{ $tree } += $d2tip;
3104        }
3105    
3106        #  Add each remaining tip at $pos, measured along the contour length
3107        #  of the tree (with no retracing along branches).
3108    
3109        foreach my $tip ( @tips )
3110        {
3111            my $pos = rand( $subtree_size->{ $tree } );
3112            random_add_to_ultrametric_tree( $tree, $tip, $subtree_size, $pos, $d2tip );
3113        }
3114    
3115        return $tree;
3116    }
3117    
3118    
3119    sub random_add_to_ultrametric_tree
3120    {
3121        my ( $node, $tip, $subtree_size, $pos, $d2tip ) = @_;
3122        $node && $node->[0] && ref $node->[0] eq 'ARRAY' or die "Bad tree node passed to random_add_to_ultrametric_tree().\n";
3123    
3124        # Find the descendent line that it goes in:
3125    
3126        my $i;
3127        my $dl = $node->[0];
3128        my $size0 = $subtree_size->{ $dl->[0] };
3129        if ( $size0 > $pos ) { $i = 0 } else { $i = 1; $pos -= $size0 }
3130        my $desc = $dl->[$i];
3131    
3132        # Does it go within the subtree, or the branch to the subtree?
3133    
3134        my $len;
3135        my $added;
3136        if ( ( $len = $desc->[2] ) <= $pos )
3137        {
3138            $added = random_add_to_ultrametric_tree( $desc, $tip, $subtree_size, $pos - $len, $d2tip - $len );
3139        }
3140        else
3141        {
3142            # If not in subtree, then it goes in the branch to the descendent node
3143            #
3144            #     ----- node  ------------       node
3145            #       ^   /  \       ^             /  \
3146            #       |       \      | pos             \l1
3147            #       |        \     v                  \
3148            #       |      len\ ----------         newnode
3149            #       |          \                     /  \ l2
3150            # d2tip |           \                   /    \
3151            #       |           desc               /     desc
3152            #       |           /  \            l3/      /  \
3153            #       |          .    .            /      .    .
3154            #       v         .      .          /      .      .
3155            #     -----      .        .     newtip    .        .
3156    
3157            my $l1      = $pos;
3158            my $l2      = $len   - $pos;
3159            my $l3      = $d2tip - $pos;
3160            my $newtip  = [ [], $tip, $l3 ];
3161            my $newnode = [ [ $desc, $newtip ], undef, $l1 ];
3162            $dl->[$i]   = $newnode;
3163            $subtree_size->{ $newtip  } = $l3;
3164            $subtree_size->{ $newnode } = $subtree_size->{ $desc } + $l3;
3165            $desc->[2] = $l2;
3166            $subtree_size->{ $desc } -= $l1;
3167            $added = $l3;
3168        }
3169    
3170        #  New branch was inserted below this point:
3171    
3172        $subtree_size->{ $node } += $added;
3173        return $added;
3174    }
3175    
3176    
3177    
3178  #===============================================================================  #===============================================================================
3179  #  #
3180  #  Tree writing and reading  #  Tree writing and reading
# Line 2543  Line 3332 
3332    
3333    
3334  #===============================================================================  #===============================================================================
3335  #  $tree  = read_newick_tree( $file )  # reads to a semicolon  #  Read to a semicolon
3336  #  @trees = read_newick_trees( $file ) # reads to end of file  #
3337    #  $tree  = read_newick_tree( )
3338    #  $tree  = read_newick_tree( \*FH )
3339    #  $tree  = read_newick_tree( $file )
3340    #
3341    #  Read to end of file:
3342    #  @trees = read_newick_trees( )
3343    #  @trees = read_newick_trees( \*FH )
3344    #  @trees = read_newick_trees( $file )
3345  #===============================================================================  #===============================================================================
3346    
3347  sub read_newick_tree  sub read_newick_tree
# Line 2553  Line 3350 
3350      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3351      my $tree;      my $tree;
3352      my @lines = ();      my @lines = ();
3353      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3354      {      {
3355          chomp;          chomp;
3356          push @lines, $_;          push @lines, $_;
# Line 2575  Line 3372 
3372      my ( $fh, $close ) = open_input( $file );      my ( $fh, $close ) = open_input( $file );
3373      my @trees = ();      my @trees = ();
3374      my @lines = ();      my @lines = ();
3375      while ( defined( $_ = <$fh> ) )      foreach ( <$fh> )
3376      {      {
3377          chomp;          chomp;
3378          push @lines, $_;          push @lines, $_;
# Line 2762  Line 3559 
3559      #  Loop while it is a comment:      #  Loop while it is a comment:
3560      while ( substr( $s, $ind, 1 ) eq "[" ) {      while ( substr( $s, $ind, 1 ) eq "[" ) {
3561          $ind++;          $ind++;
3562            my $depth = 1;
3563            my $ind2  = $ind;
3564    
3565          #  Find end          #  Find end
3566          if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {          while ( $depth > 0 )
3567            {
3568                if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ )     # nested [ ... ]
3569                {
3570                    $ind2 += length( $1 );  #  Points at char just past [
3571                    $depth++;               #  If nested comments are allowed
3572                }
3573                elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ )  # close bracket
3574                {
3575                    $ind2 += length( $1 );  #  Points at char just past ]
3576                    $depth--;
3577                }
3578                else
3579                {
3580              treeParseError( "comment missing closing bracket '["              treeParseError( "comment missing closing bracket '["
3581                             . substr( $s, $ind ) . "'" )                             . substr( $s, $ind ) . "'" )
3582          }          }
3583          my $comment = $1;          }
3584    
3585          #  Save if it includes any "text"          my $comment = substr( $s, $ind, $ind2-$ind-1 );
3586          if ( $comment =~ m/\S/ ) { push @clist, $comment }          if ( $comment =~ m/\S/ ) { push @clist, $comment }
3587    
3588          $ind += length( $comment ) + 1;     #  Comment plus closing bracket          $ind = $ind2;
3589    
3590          #  Skip white space          #  Skip white space
3591          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }          if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
# Line 2792  Line 3604 
3604  #===============================================================================  #===============================================================================
3605  #  Make a printer plot of a tree:  #  Make a printer plot of a tree:
3606  #  #
3607  #     $node   newick tree root node  #  printer_plot_newick( $node, $file, $width, $min_dx, $dy )
3608  #     $file   undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name.  #  printer_plot_newick( $node, $file, \%options )
3609  #     $width  the approximate characters for the tree without labels  #
3610  #     $min_dx the minimum horizontal branch length  #     $node   # newick tree root node
3611  #     $dy     the vertical space per taxon  #     $file   # undef = \*STDOUT, \*FH, or a file name.
3612    #     $width  # the approximate characters for the tree without labels (D = 68)
3613    #     $min_dx # the minimum horizontal branch length (D = 2)
3614    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3615    #
3616    #  Options:
3617    #
3618    #    dy     => nat_number    # the vertical space per taxon
3619    #    chars  => key           # line drawing character set:
3620    #                            #     html_unicode
3621    #                            #     text (default)
3622    #    min_dx => whole_number  # the minimum horizontal branch length
3623    #    width  => whole_number  # approximate tree width without labels
3624  #  #
 #  printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )  
3625  #===============================================================================  #===============================================================================
3626  sub printer_plot_newick {  sub printer_plot_newick
3627      my ( $node, $file, $width, $min_dx, $dy ) = @_;  {
3628        my ( $node, $file, @opts ) = @_;
3629    
3630      my ( $fh, $close ) = open_output( $file );      my ( $fh, $close ) = open_output( $file );
3631      $fh or return;      $fh or return;
3632    
3633      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      my $html = $opts[0] && ref($opts[0]) eq 'HASH'
3634                            && $opts[0]->{ chars }
3635                            && $opts[0]->{ chars } =~ /html/;
3636        print $fh '<PRE>' if $html;
3637        print $fh join( "\n", text_plot_newick( $node, @opts ) ), "\n";
3638        print $fh "</PRE>\n" if $html;
3639    
3640      if ( $close ) { close $fh }      if ( $close ) { close $fh }
3641  }  }
3642    
3643    
3644  #===============================================================================  #===============================================================================
3645    #  Character sets for printer plot trees:
3646    #-------------------------------------------------------------------------------
3647    
3648    my %char_set =
3649      ( text1     => { space  => ' ',
3650                       horiz  => '-',
3651                       vert   => '|',
3652                       el_d_r => '/',
3653                       el_u_r => '\\',
3654                       el_d_l => '\\',
3655                       el_u_l => '/',
3656                       tee_l  => '+',
3657                       tee_r  => '+',
3658                       tee_u  => '+',
3659                       tee_d  => '+',
3660                       half_l => '-',
3661                       half_r => '-',
3662                       half_u => '|',
3663                       half_d => '|',
3664                       cross  => '+',
3665                     },
3666        text2     => { space  => ' ',
3667                       horiz  => '-',
3668                       vert   => '|',
3669                       el_d_r => '+',
3670                       el_u_r => '+',
3671                       el_d_l => '+',
3672                       el_u_l => '+',
3673                       tee_l  => '+',
3674                       tee_r  => '+',
3675                       tee_u  => '+',
3676                       tee_d  => '+',
3677                       half_l => '-',
3678                       half_r => '-',
3679                       half_u => '|',
3680                       half_d => '|',
3681                       cross  => '+',
3682                     },
3683        html_box  => { space  => '&nbsp;',
3684                       horiz  => '&#9472;',
3685                       vert   => '&#9474;',
3686                       el_d_r => '&#9484;',
3687                       el_u_r => '&#9492;',
3688                       el_d_l => '&#9488;',
3689                       el_u_l => '&#9496;',
3690                       tee_l  => '&#9508;',
3691                       tee_r  => '&#9500;',
3692                       tee_u  => '&#9524;',
3693                       tee_d  => '&#9516;',
3694                       half_l => '&#9588;',
3695                       half_r => '&#9590;',
3696                       half_u => '&#9589;',
3697                       half_d => '&#9591;',
3698                       cross  => '&#9532;',
3699                     },
3700        utf8_box  => { space  => ' ',
3701                       horiz  => chr(226) . chr(148) . chr(128),
3702                       vert   => chr(226) . chr(148) . chr(130),
3703                       el_d_r => chr(226) . chr(148) . chr(140),
3704                       el_u_r => chr(226) . chr(148) . chr(148),
3705                       el_d_l => chr(226) . chr(148) . chr(144),
3706                       el_u_l => chr(226) . chr(148) . chr(152),
3707                       tee_l  => chr(226) . chr(148) . chr(164),
3708                       tee_r  => chr(226) . chr(148) . chr(156),
3709                       tee_u  => chr(226) . chr(148) . chr(180),
3710                       tee_d  => chr(226) . chr(148) . chr(172),
3711                       half_l => chr(226) . chr(149) . chr(180),
3712                       half_r => chr(226) . chr(149) . chr(182),
3713                       half_u => chr(226) . chr(149) . chr(181),
3714                       half_d => chr(226) . chr(149) . chr(183),
3715                       cross  => chr(226) . chr(148) . chr(188),
3716                     },
3717      );
3718    
3719    %{ $char_set{ html1 } } = %{ $char_set{ text1 } };
3720    $char_set{ html1 }->{ space } = '&nbsp;';
3721    
3722    %{ $char_set{ html2 } } = %{ $char_set{ text2 } };
3723    $char_set{ html2 }->{ space } = '&nbsp;';
3724    
3725    #  Define some synonyms
3726    
3727    $char_set{ html } = $char_set{ html_box };
3728    $char_set{ line } = $char_set{ utf8_box };
3729    $char_set{ symb } = $char_set{ utf8_box };
3730    $char_set{ text } = $char_set{ text1 };
3731    $char_set{ utf8 } = $char_set{ utf8_box };
3732    
3733    #  Define tree formats and synonyms
3734    
3735    my %tree_format =
3736        ( text         => 'text',
3737          tree_tab_lbl => 'tree_tab_lbl',
3738          tree_lbl     => 'tree_lbl',
3739          chrlist_lbl  => 'chrlist_lbl',
3740          raw          => 'chrlist_lbl',
3741        );
3742    
3743    #===============================================================================
3744  #  Make a text plot of a tree:  #  Make a text plot of a tree:
3745  #  #
3746  #     $node   newick tree root node  #  @lines = text_plot_newick( $node, $width, $min_dx, $dy )
3747  #     $width  the approximate characters for the tree without labels  #  @lines = text_plot_newick( $node, \%options )
3748  #     $min_dx the minimum horizontal branch length  #
3749  #     $dy     the vertical space per taxon  #     $node   # newick tree root node
3750    #     $width  # the approximate characters for the tree without labels (D = 68)
3751    #     $min_dx # the minimum horizontal branch length (D = 2)
3752    #     $dy     # the vertical space per taxon (D = 1, most compressed)
3753    #
3754    #  Options:
3755    #
3756    #    chars  => keyword       # the output character set for the tree
3757    #    dy     => nat_number    # the vertical space per taxon
3758    #    format => keyword       # output format of each line
3759    #    min_dx => whole_number  # the minimum horizontal branch length
3760    #    width  => whole_number  # approximate tree width without labels
3761    #
3762    #  Character sets:
3763    #
3764    #    html       #  synonym of html1
3765    #    html_box   #  html encoding of unicode box drawing characters
3766    #    html1      #  text1 with nonbreaking spaces
3767    #    html2      #  text2 with nonbreaking spaces
3768    #    line       #  synonym of utf8_box
3769    #    raw        #  pass out the internal representation
3770    #    symb       #  synonym of utf8_box
3771    #    text       #  synonym of text1 (Default)
3772    #    text1      #  ascii characters: - + | / \ and space
3773    #    text2      #  ascii characters: - + | + + and space
3774    #    utf8       #  synonym of utf8_box
3775    #    utf8_box   #  utf8 encoding of unicode box drawing characters
3776    #
3777    #  Formats for row lines:
3778    #
3779    #    text           #    $textstring              # Default
3780    #    tree_tab_lbl   #    $treestr \t $labelstr
3781    #    tree_lbl       # [  $treestr,  $labelstr ]
3782    #    chrlist_lbl    # [ \@treechar, $labelstr ]   # Forced with raw chars
3783    #    raw            #  synonym of chrlist_lbl
3784  #  #
 #  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )  
3785  #===============================================================================  #===============================================================================
3786  sub text_plot_newick {  sub text_plot_newick
3787      my ( $node, $width, $min_dx, $dy ) = @_;  {
3788        my $node = shift @_;
3789      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";      array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
3790      defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;  
3791      defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;      my ( $opts, $width, $min_dx, $dy, $chars, $fmt );
3792      defined( $width  )                       or  $width = 68;      if ( $_[0] && ref $_[0] eq 'HASH' )
3793        {
3794            $opts = shift;
3795        }
3796        else
3797        {
3798            ( $width, $min_dx, $dy ) = @_;
3799            $opts = {};
3800        }
3801    
3802        $chars = $opts->{ chars } || '';
3803        my $charH;
3804        $charH = $char_set{ $chars } || $char_set{ 'text1' } if ( $chars ne 'raw' );
3805        my $is_box = $charH eq $char_set{ html_box }
3806                  || $charH eq $char_set{ utf8_box }
3807                  || $chars eq 'raw';
3808    
3809        $fmt = ( $chars eq 'raw' ) ? 'chrlist_lbl' : $opts->{ format };
3810        $fmt = $tree_format{ $fmt || '' } || 'text';
3811    
3812        $dy    ||= $opts->{ dy     } ||  1;
3813        $width ||= $opts->{ width  } || 68;
3814        $min_dx  = $opts->{ min_dx } if ( ! defined $min_dx || $min_dx < 0 );
3815        $min_dx  = $is_box ? 1 : 2   if ( ! defined $min_dx || $min_dx < 0 );
3816    
3817        #  Layout the tree:
3818    
3819      $min_dx = int( $min_dx );      $min_dx = int( $min_dx );
3820      $dy     = int( $dy );      $dy     = int( $dy );
# Line 2835  Line 3823 
3823      my $hash = {};      my $hash = {};
3824      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );      layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );
3825    
3826      # dump_tree_hash( $node, $hash ); exit;      #  Generate the lines of the tree-one by-one:
   
     #  Generate the lines of the tree one by one:  
3827    
3828      my ( $y1, $y2 ) = @{ $hash->{ $node } };      my ( $y1, $y2 ) = @{ $hash->{ $node } };
3829      map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );      my @lines;
3830        foreach ( ( $y1 .. $y2 ) )
3831        {
3832            my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3833            my $lbl  = '';
3834            if ( @$line )
3835            {
3836                if ( $line->[-1] eq '' ) { pop @$line; $lbl = pop @$line }
3837                #  Translate tree characters
3838                @$line = map { $charH->{ $_ } } @$line if $chars ne 'raw';
3839            }
3840    
3841            # Convert to requested output format:
3842    
3843            push @lines, $fmt eq 'text'         ? join( '', @$line, ( $lbl ? " $lbl" : () ) )
3844                       : $fmt eq 'text_tab_lbl' ? join( '', @$line, "\t", $lbl )
3845                       : $fmt eq 'tree_lbl'     ? [ join( '', @$line ), $lbl ]
3846                       : $fmt eq 'chrlist_lbl'  ? [ $line, $lbl ]
3847                       :                          ();
3848  }  }
3849    
3850        # if ( $cells )
3851        # {
3852        #     my $nmax = 0;
3853        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3854        #     foreach ( @lines )
3855        #     {
3856        #         @$_ = map { "<TD>$_</TD>" } @$_;
3857        #         my $span = $nmax - @$_ + 1;
3858        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3859        #     }
3860        # }
3861        # elsif ( $tables )
3862        # {
3863        #     my $nmax = 0;
3864        #     foreach ( @lines ) { $nmax = @$_ if @$_ > $nmax }
3865        #     foreach ( @lines )
3866        #     {
3867        #         @$_ = map { "<TD>$_</TD>" } @$_;
3868        #         my $span = $nmax - @$_ + 1;
3869        #         $_->[-1] =~ s/^<TD>/<TD NoWrap ColSpan=$span>/;
3870        #     }
3871        # }
3872    
3873        wantarray ? @lines : \@lines;
3874    }
3875    
3876    
3877  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3878  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )  #  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd )
3879  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3880  sub layout_printer_plot {  sub layout_printer_plot
3881      my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;  {
3882        my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy, $yrnd ) = @_;
3883      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";      array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
3884      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";      hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";
3885    
3886      my $dx = newick_x( $node );      my $dx = newick_x( $node );
3887      if ( defined( $dx ) ) {      if ( defined( $dx ) ) {
3888          $dx *= $x_scale;          $dx *= $x_scale;
3889          $dx >= $min_dx or $dx = $min_dx;          $dx = $min_dx if $dx < $min_dx;
3890      }      }
3891      else {      else {
3892          $dx = ( $x0 > 0 ) ? $min_dx : 0;          $dx = ( $x0 > 0 ) ? $min_dx : 0;
# Line 2881  Line 3913 
3913          $ymax = $y0;          $ymax = $y0;
3914    
3915          foreach ( @dl ) {          foreach ( @dl ) {
3916              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );              ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy,
3917                                                              ( 2*@ylist < @dl ? 0.5001 : 0.4999 )
3918                                                            );
3919              push @ylist, $yi;              push @ylist, $yi;
3920              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }              if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
3921          }          }
# Line 2891  Line 3925 
3925    
3926          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3927          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3928          $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );          $y   = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3929    
3930            #  Handle special case of internal node label. Put it between subtrees.
3931    
3932            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3933                #  Find the descendents $i1 and $i2 to put the branch between
3934                my $i2 = 1;
3935                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3936                my $i1 = $i2 - 1;
3937                #  Get bottom of subtree1 and top of subtree2:
3938                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3939                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3940                #  Midway between bottom of subtree1 and top of subtree2, with
3941                #  preferred rounding direction
3942                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3943            }
3944      }      }
3945    
3946      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 2901  Line 3950 
3950  }  }
3951    
3952    
3953  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #  What symbol do we get if we add a leftward line to some other symbol?
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree {  
     my ( $node, $prefix ) = @_;  
     defined( $prefix ) or $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }  
     $prefix or print STDERR "\n";  
 }  
   
   
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 #  Debug routine  
 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
 sub dump_tree_hash {  
     my ( $node, $hash, $prefix ) = @_;  
     defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";  
     print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";  
     my @dl = $node->[0] ? @{$node->[0]} : ();  
     foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }  
 }  
3954    
3955    my %with_left_line = ( space  => 'half_l',
3956                           horiz  => 'horiz',
3957                           vert   => 'tee_l',
3958                           el_d_r => 'tee_d',
3959                           el_u_r => 'tee_u',
3960                           el_d_l => 'el_d_l',
3961                           el_u_l => 'el_u_l',
3962                           tee_l  => 'tee_l',
3963                           tee_r  => 'cross',
3964                           tee_u  => 'tee_u',
3965                           tee_d  => 'tee_d',
3966                           half_l => 'half_l',
3967                           half_r => 'horiz',
3968                           half_u => 'el_u_l',
3969                           half_d => 'el_d_l',
3970                           cross  => 'cross',
3971                         );
3972    
3973  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3974  #  $line = text_tree_row( $node, $hash, $row, $line, $symb )  #  Produce a description of one line of a printer plot tree.
3975    #
3976    #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3977    #
3978    #     \@line is the character descriptions accumulated so far, one per array
3979    #          element, except for a label, which can be any number of characters.
3980    #          Labels are followed by an empty string, so if $line->[-1] eq '',
3981    #          then $line->[-2] is a label. The calling program translates the
3982    #          symbol names to output characters.
3983    #
3984    #     \@node is a newick tree node
3985    #     \%hash contains tree layout information
3986    #      $row  is the row number (y value) that we are building
3987    #      $symb is the plot symbol proposed for the current x and y position
3988    #      $ilbl is true if internal node labels are allowed
3989    #
3990  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3991  sub text_tree_row {  sub text_tree_row
3992      my ( $node, $hash, $row, $line, $symb ) = @_;  {
3993        my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3994    
3995      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3996      if ( $row < $y1 || $row > $y2 ) { return $line }      if ( $row < $y1 || $row > $y2 ) { return $line }
3997    
3998      if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }      if ( @$line < $x0 ) { push @$line, ('space') x ( $x0 - @$line ) }
3999    
4000      if ( $row == $y ) {      if ( $row == $y ) {
4001          $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");          while ( @$line > $x0 ) { pop @$line }  # Actually 0-1 times
4002            push @$line, $symb,
4003                         ( ( $x > $x0 ) ? ('horiz') x ($x - $x0) : () );
4004      }      }
4005    
4006      elsif ( $row > $yn1 && $row < $yn2 ) {      elsif ( $row > $yn1 && $row < $yn2 ) {
4007          if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }          if ( @$line < $x ) { push @$line, ('space') x ( $x - @$line ), 'vert' }
4008          else { substr( $line, $x ) = "|" }          else               { $line->[$x] = 'vert' }
4009      }      }
4010    
4011      my @dl = newick_desc_list( $node );      my @dl = newick_desc_list( $node );
4012    
4013      if ( @dl < 1 ) {      if ( @dl < 1 ) { push @$line, ( newick_lbl( $node ) || '' ), '' }
         $line .= " " . $node->[1];  
     }  
4014    
4015      else {      else {
4016          my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line          my @list = map { [ $_, 'tee_r' ] } @dl;  # Line to the right
4017          $list[ 0]->[1] = "/";          if ( @list > 1 ) { #  Fix top and bottom sympbols
4018          $list[-1]->[1] = "\\";              $list[ 0]->[1] = 'el_d_r';
4019                $list[-1]->[1] = 'el_u_r';
4020            }
4021            elsif ( @list ) {  # Only one descendent
4022                $list[ 0]->[1] = 'half_r';
4023            }
4024          foreach ( @list ) {          foreach ( @list ) {
4025              my ( $n, $s ) = @$_;              my ( $n, $s ) = @$_;
4026              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
4027                  $line = text_tree_row( $n, $hash, $row, $line, $s );                  $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
4028              }              }
4029           }           }
4030    
4031          if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }          if ( $row == $y ) {
4032                $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
4033                                                         : $with_left_line{ $line->[$x] };
4034                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
4035            }
4036      }      }
4037    
4038      return $line;      return $line;
4039  }  }
4040    
4041    
4042    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4043    #  Debug routine
4044    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4045    sub dump_tree {
4046        my ( $node, $prefix ) = @_;
4047        defined( $prefix ) or $prefix = "";
4048        print STDERR $prefix, join(", ", @$node), "\n";
4049        my @dl = $node->[0] ? @{$node->[0]} : ();
4050        foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }
4051        $prefix or print STDERR "\n";
4052    }
4053    
4054    
4055    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056    #  Debug routine
4057    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4058    sub dump_tree_hash {
4059        my ( $node, $hash, $prefix ) = @_;
4060        defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";
4061        print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";
4062        my @dl = $node->[0] ? @{$node->[0]} : ();
4063        foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }
4064    }
4065    
4066    
4067  #===============================================================================  #===============================================================================
4068  #  Open an input file stream:  #  Open an input file stream:
4069  #  #
# Line 2983  Line 4076 
4076  {  {
4077      my $file = shift;      my $file = shift;
4078      my $fh;      my $fh;
4079      if    ( ! defined( $file ) )     { return ( \*STDIN ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDIN ) }
4080      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
4081      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
4082    
# Line 3004  Line 4097 
4097  {  {
4098      my $file = shift;      my $file = shift;
4099      my $fh;      my $fh;
4100      if    ( ! defined( $file ) )     { return ( \*STDOUT ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDOUT ) }
4101      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
4102      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
4103    
# Line 3012  Line 4105 
4105      return undef;      return undef;
4106  }  }
4107    
4108    
4109    #===============================================================================
4110    #  Some subroutines copied from gjolists
4111    #===============================================================================
4112    #  Return the common prefix of two lists:
4113    #
4114    #  @common = common_prefix( \@list1, \@list2 )
4115    #-----------------------------------------------------------------------------
4116    sub common_prefix
4117    {
4118        my ($l1, $l2) = @_;
4119        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4120        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4121    
4122        my $i = 0;
4123        my $l1_i;
4124        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
4125    
4126        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
4127    }
4128    
4129    
4130    #-----------------------------------------------------------------------------
4131    #  Return the unique suffixes of each of two lists:
4132    #
4133    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
4134    #-----------------------------------------------------------------------------
4135    sub unique_suffixes
4136    {
4137        my ($l1, $l2) = @_;
4138        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
4139        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
4140    
4141        my $i = 0;
4142        my @l1 = @$l1;
4143        my @l2 = @$l2;
4144        my $l1_i;
4145        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
4146    
4147        splice @l1, 0, $i;
4148        splice @l2, 0, $i;
4149        return ( \@l1, \@l2 );
4150    }
4151    
4152    
4153    #-------------------------------------------------------------------------------
4154    #  List of values duplicated in a list (stable in order by second occurance):
4155    #
4156    #  @dups = duplicates( @list )
4157    #-------------------------------------------------------------------------------
4158    sub duplicates
4159    {
4160        my %cnt = ();
4161        grep { ++$cnt{$_} == 2 } @_;
4162    }
4163    
4164    
4165    #-------------------------------------------------------------------------------
4166    #  Randomize the order of a list:
4167    #
4168    #  @random = random_order( @list )
4169    #-------------------------------------------------------------------------------
4170    sub random_order
4171    {
4172        my ( $i, $j );
4173        for ( $i = @_ - 1; $i > 0; $i-- )
4174        {
4175            $j = int( ($i+1) * rand() );
4176            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
4177        }
4178    
4179       @_;
4180    }
4181    
4182    
4183    #-----------------------------------------------------------------------------
4184    #  Intersection of two or more sets:
4185    #
4186    #  @intersection = intersection( \@set1, \@set2, ... )
4187    #-----------------------------------------------------------------------------
4188    sub intersection
4189    {
4190        my $set = shift;
4191        my @intersection = @$set;
4192    
4193        foreach $set ( @_ )
4194        {
4195            my %set = map { $_ => 1 } @$set;
4196            @intersection = grep { exists $set{ $_ } } @intersection;
4197        }
4198    
4199        @intersection;
4200    }
4201    
4202    
4203    #-----------------------------------------------------------------------------
4204    #  Elements in set 1, but not set 2:
4205    #
4206    #  @difference = set_difference( \@set1, \@set2 )
4207    #-----------------------------------------------------------------------------
4208    sub set_difference
4209    {
4210        my ($set1, $set2) = @_;
4211        my %set2 = map { $_ => 1 } @$set2;
4212        grep { ! ( exists $set2{$_} ) } @$set1;
4213    }
4214    
4215    
4216  1;  1;

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.23

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3