[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

View of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Nov 30 08:44:26 2005 UTC (14 years ago) by golsen
Branch: MAIN
Introduce a different tree printing routine to fid_checked.  This required
a new version of align_with_clustal.pl (= align_with_clustal_2.pl) and a
new tree utilities library (gjonewicklib.pm).

The new tree printer does not require deleting the extra lines (they are
not included in the first place).  This would probably allow some other
restructuring of the code in fid_checked, but I did the minimum.

The other reason for the changes (actually the real reason) is that it is
now possible to introduce midpoint rooting to the tree, and to reorder
the taxa so that the tree is MUCH easier to read (aesthetic tree order).

Also I'm changing it to use a second-generation tree from the alignment,
not just show the guide tree.

package gjonewicklib;

#===============================================================================
#  perl functions for dealing with trees
#
#  Usage:
#      use gjonewicklib
#
#
#===============================================================================
#  Tree data structures:
#===============================================================================
#
#  Elements in newick text file are:
#
#     [c1] ( desc_1, desc_2, ... ) [c2] label [c3] : [c4] x [c5]
#
#  Note that:
#  
#     Comment list 1 can exist on any subtree, but its association with
#        tree components can be upset by rerooting
#     Comment list 2 cannot exist without a descendant list
#     Comment list 3 cannot exist without a label
#     Comment lists 4 and 5 cannot exist without a branch length
#
#  Elements in perl representation are:
#
#     $tree = \@rootnode;
#
#     @node = ( \@desc,  #  reference to list of descendants
#                $label, #  node label
#                $x,     #  branch length
#               \@c1,    #  reference to comment list 1
#               \@c2,    #  reference to comment list 2
#               \@c3,    #  reference to comment list 3
#               \@c4,    #  reference to comment list 4
#               \@c5     #  reference to comment list 5
#             )
#
#  At present, no routine tests or enforces the length of the list (a single
#  element list could be a valid internal node).
#
#  All empty lists can be [] or undef
#
#  Putting the comments at the end allows a shorter list nearly all the
#  time, but is different from the prolog representation.
#
#
#===============================================================================
#  Tree data extraction:
#===============================================================================
#
#  $listref  = newick_desc_ref( $noderef )
#  $label    = newick_lbl( $noderef )
#  $x        = newick_x( $noderef )
#  $listref  = newick_c1( $noderef )
#  $listref  = newick_c2( $noderef )
#  $listref  = newick_c3( $noderef )
#  $listref  = newick_c4( $noderef )
#  $listref  = newick_c5( $noderef )
#
#  @desclist = newick_desc_list( $noderef )
#  $n        = newick_n_desc( $noderef )
#  $descref  = newick_desc_i( $noderef, $i )    # 1-based numbering
#  $bool     = newick_is_tip( $noderef )
#
#  set_newick_desc_ref( $noderef, $listref )
#  set_newick_lbl( $noderef, $label )
#  set_newick_x( $noderef, $x )
#  set_newick_c1( $noderef, $listref )
#  set_newick_c2( $noderef, $listref )
#  set_newick_c3( $noderef, $listref )
#  set_newick_c4( $noderef, $listref )
#  set_newick_c5( $noderef, $listref )
#  set_newick_desc_list( $noderef, @desclist )
#  set_newick_desc_i( $noderef1, $i, $noderef2)
#
#  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
#  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
#  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root
#  $bool    = newick_is_bifurcating( $noderef )
#
#  $n       = newick_tip_count( $noderef )
#  @tiprefs = newick_tip_ref_list( $noderef )
#  @tips    = newick_tip_list( $noderef )
#  $tipref  = newick_first_tip_ref( $noderef )
#  $tip     = newick_first_tip( $noderef )
#  @tips    = newick_duplicated_tips( $noderef )
#  $bool    = newick_tip_in_tree( $noderef, $tipname )
#  @tips    = newick_shared_tips( $tree1, $tree2 )
#
#  $length  = newick_tree_length( $noderef )
#  $xmax    = newick_max_X( $noderef )
#  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
#  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )
#
#-------------------------------------------------------------------------------
#  Paths from root of tree:
#-------------------------------------------------------------------------------
#
#  Path descriptions are of form:
#      ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN )
#      () is returned upon failure
#
#  @path = path_to_tip( $treenode, $tipname )
#  @path = path_to_named_node( $treenode, $nodename )
#  @path = path_to_node_ref( $treenode, $noderef )
#
#  @path = path_to_node( $node,   $tip1, $tip2, $tip3   )  #  3 tip names
#  @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] )  #  Array of tips
#  @path = path_to_node( $node,   $tip1                 )  #  Use path_to_tip
#  @path = path_to_node( $node, [ $tip1 ]               )  #  Use path_to_tip
#
#  $distance = newick_path_length( @path )
#  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )
#  $distance = node_to_node_distance( $tree, $node1, $node2 )
#
#
#===============================================================================
#  Tree manipulations:
#===============================================================================
#
#  $treecopy = copy__newick_tree( $tree )
#
#-------------------------------------------------------------------------------
#  The following modify the existing tree, and passibly any components of that
#  tree that are reached by reference.  If the old version is still needed, copy
#  before modifying.
#-------------------------------------------------------------------------------
#
#  Modify labels:
#
#  $newtree = newick_relabel_nodes( $node, \%new_name )
#  $newtree = newick_relabel_nodes_i( $node, \%new_name )
#  $newtree = newick_relabel_tips( $node, \%new_name )
#  $newtree = newick_relabel_tips_i( $node, \%new_name )
#
#  Modify branches:
#
#  $n_changed = newick_set_undefined_branches( $node, $x )
#  $n_changed = newick_set_all_branches( $node, $x )
#
#  Modify rooting and/or order:
#
#  $nrmtree = normalize_newick_tree( $tree )
#  $revtree = reverse_newick_tree( $tree )
#  $stdtree = std_unrooted_newick( $tree )
#  $newtree = aesthetic_newick_tree( $tree, $direction )
#  $rndtree = random_order_newick_tree( $tree )
#  $newtree = reroot_newick_by_path( @path )
#  $newtree = reroot_newick_to_tip( $tree, $tip )
#  $newtree = reroot_newick_next_to_tip( $tree, $tip )
#  $newtree = reroot_newick_to_node( $tree, @node )
#  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
#  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
#  $newtree = uproot_tip_rooted_newick( $tree )
#  $newtree = uproot_newick( $tree )
#
#  $newtree = prune_from_newick( $tree, $tip )
#  $newtree = newick_subtree( $tree,  @tips )
#  $newtree = newick_subtree( $tree, \@tips )
#
#===============================================================================
#  Tree reading and writing:
#===============================================================================
#
#   writeNewickTree( $tree )
#   writeNewickTree( $tree, $file )
#   writePrettyTree( $tree, $file )
#  fwriteNewickTree( $file, $tree )
#  $treestring = swriteNewickTree( $tree )
#  $treestring = formatNewickTree( $tree )
#  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
#   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
#
#  $tree = parse_newick_tree_str( $string )
#
#===============================================================================


require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(
        newick_is_rooted
        newick_is_unrooted
        tree_rooted_on_tip
        newick_is_bifurcating
        newick_tip_count
        newick_tip_list
        newick_first_tip
        newick_duplicated_tips
        newick_tip_in_tree
        newick_shared_tips

        newick_tree_length
        newick_max_X
        newick_most_distant_tip_ref
        newick_most_distant_tip_name
        
        std_newick_name

        path_to_tip
        path_to_named_node
        path_to_node_ref
        path_to_node

        newick_path_length
        tip_to_tip_distance
        node_to_node_distance

        copy__newick_tree

        newick_relabel_nodes
        newick_relabel_nodes_i
        newick_relabel_tips
        newick_relabel_tips_i

        newick_set_undefined_branches
        newick_set_all_branches

        normalize_newick_tree
        reverse_newick_tree
        std_unrooted_newick
        aesthetic_newick_tree
        unaesthetic_newick_tree
        random_order_newick_tree

        reroot_newick_by_path
        reroot_newick_to_tip
        reroot_newick_next_to_tip
        reroot_newick_to_node
        reroot_newick_to_node_ref
        reroot_newick_to_approx_midpoint
        uproot_tip_rooted_newick
        uproot_newick

        prune_from_newick
        newick_subtree

        writeNewickTree
        fwriteNewickTree
        strNewickTree
        formatNewickTree
        parse_newick_tree_str

        printer_plot_newick
        text_plot_newick
        );

our @EXPORT_OK = qw(
        newick_desc_ref
        newick_lbl
        newick_x
        newick_c1
        newick_c2
        newick_c3
        newick_c4
        newick_c5
        newick_desc_list
        newick_n_desc
        newick_desc_i
        newick_is_tip
        newick_is_valid

        set_newick_desc_ref
        set_newick_lbl
        set_newick_x
        set_newick_c1
        set_newick_c2
        set_newick_c3
        set_newick_c4
        set_newick_c5

        set_newick_desc_list
        set_newick_desc_i

        add_to_newick_branch
        dump_tree
        );


use gjolists qw(
        common_prefix
        common_and_unique
        unique_suffixes
        
        unique_set
        duplicates
        random_order

        union
        intersection
        set_difference
        );


use strict;


#-------------------------------------------------------------------------------
#  Internally used definitions
#-------------------------------------------------------------------------------

sub array_ref { ref( $_[0] ) eq "ARRAY" }
sub hash_ref  { ref( $_[0] ) eq "HASH"  }


#===============================================================================
#  Extract tree structure values:
#===============================================================================
#
#     $listref = newick_desc_ref( $noderef )
#     $string  = newick_lbl( $noderef )
#     $real    = newick_x( $noderef )
#     $listref = newick_c1( $noderef )
#     $listref = newick_c2( $noderef )
#     $listref = newick_c3( $noderef )
#     $listref = newick_c4( $noderef )
#     $listref = newick_c5( $noderef )
#     @list    = newick_desc_list( $noderef )
#     $int     = newick_n_desc( $noderef )
#     $listref = newick_desc_i( $noderef )
#     $bool    = node_is_tip( $noderef )
#     $bool    = node_is_valid( $noderef )
#
#-------------------------------------------------------------------------------

sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]
sub newick_lbl      { $_[0]->[1] }
sub newick_x        { $_[0]->[2] }
sub newick_c1       { $_[0]->[3] }
sub newick_c2       { $_[0]->[4] }
sub newick_c3       { $_[0]->[5] }
sub newick_c4       { $_[0]->[6] }
sub newick_c5       { $_[0]->[7] }

sub newick_desc_list {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef           :
      array_ref( $node->[0] ) ? @{ $node->[0] } :
                                ()              ;
}

sub newick_n_desc {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef                  :
      array_ref( $node->[0] ) ? scalar @{ $node->[0] } :
                                0                      ;
}

sub newick_desc_i {
    my ( $node, $i ) = @_;
    ! array_ref( $node      ) ? undef              :
      array_ref( $node->[0] ) ? $node->[0]->[$i-1] :
                                undef              ;
}

sub node_is_tip {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef                :  # Not a node ref
      array_ref( $node->[0] ) ? @{ $node->[0] } == 0 :  # Empty descend list?
                                1                    ;  # No descend list
}

sub node_is_valid {      #  An array ref with nonempty descend list or a label
    my $node = $_[0];
    array_ref( $node ) && (  array_ref( $node->[0] ) && @{ $node->[0] }
                          || defined( $node->[1] )
                          )
}


#-------------------------------------------------------------------------------
#  Set tree structure values
#-------------------------------------------------------------------------------

sub set_newick_desc_ref { $_[0]->[0] = $_[1] }
sub set_newick_lbl      { $_[0]->[1] = $_[1] }
sub set_newick_x        { $_[0]->[2] = $_[1] }
sub set_newick_c1       { $_[0]->[3] = $_[1] }
sub set_newick_c2       { $_[0]->[4] = $_[1] }
sub set_newick_c3       { $_[0]->[5] = $_[1] }
sub set_newick_c4       { $_[0]->[6] = $_[1] }
sub set_newick_c5       { $_[0]->[7] = $_[1] }

sub set_newick_desc_list {
    my $node = shift;
    array_ref( $node ) || return;
    if ( array_ref( $node->[0] ) ) { @{ $node->[0] } =   @_   }
    else                           {    $node->[0]   = [ @_ ] }
}

sub set_newick_desc_i {
    my ( $node1, $i, $node2 ) = @_;
    array_ref( $node1 ) && array_ref( $node2 ) || return;
    if ( array_ref( $node1->[0] ) ) { $node1->[0]->[$i-1] =   $node2   }
    else                            { $node1->[0]         = [ $node2 ] }
}


#===============================================================================
#  Some tree property tests:
#===============================================================================
#  Tree is rooted (2 branches at root node)?
#
#  $bool = newick_is_rooted( $node )
#-------------------------------------------------------------------------------
sub newick_is_rooted {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef                :  # Not a node ref
      array_ref( $node->[0] ) ? @{ $node->[0] } == 2 :  # 2 branches
                                0                    ;  # No descend list
}


#-------------------------------------------------------------------------------
#  Tree is unrooted (> 2 branches at root node)?
#
#  $bool = newick_is_unrooted( $node )
#-------------------------------------------------------------------------------
sub newick_is_unrooted {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef                :  # Not a node ref
      array_ref( $node->[0] ) ? @{ $node->[0] } >= 3 :  # Over 2 branches
                                0                    ;  # No descend list
}


#-------------------------------------------------------------------------------
#  Tree is rooted on a tip (1 branch at root node)?
#
#  $bool = newick_is_tip_rooted( $node )
#-------------------------------------------------------------------------------
sub newick_is_tip_rooted {
    my $node = $_[0];
    ! array_ref( $node      ) ? undef                :  # Not a node ref
      array_ref( $node->[0] ) ? @{ $node->[0] } == 1 :  # 1 branch
                                0                    ;  # No descend list
}

#===============================================================================
#  Everything below this point refers to parts of the tree structure using
#  only the routines above.
#===============================================================================
#  Tree is bifurcating?  If so, return number of descendents of root node.
#
#  $n_desc = newick_is_bifurcating( $node )
#-------------------------------------------------------------------------------
sub newick_is_bifurcating {
    my ( $node, $not_root ) = @_;
    if ( ! array_ref( $node ) ) { return undef }    #  Bad arg

    my $n = newick_n_desc( $node );
    $n == 0 && ! $not_root                                             ? 0 :
    $n == 1 &&   $not_root                                             ? 0 :
    $n == 3 &&   $not_root                                             ? 0 :
    $n >  3                                                            ? 0 :
    $n >  2 && ! newick_is_bifurcating( newick_desc_i( $node, 3, 1 ) ) ? 0 :
    $n >  1 && ! newick_is_bifurcating( newick_desc_i( $node, 2, 1 ) ) ? 0 :
    $n >  0 && ! newick_is_bifurcating( newick_desc_i( $node, 1, 1 ) ) ? 0 :
                                                                         $n
}


#-------------------------------------------------------------------------------
#  Number of tips:
#
#  $n = newick_tip_count( $node )
#-------------------------------------------------------------------------------
sub newick_tip_count {
    my ( $node, $not_root ) = @_;

    my $imax = newick_n_desc( $node );
    if ( $imax < 1 ) { return 1 }

    #  Special case for tree rooted on tip

    my $n = ( $imax == 1 && ( ! $not_root ) ) ? 1 : 0;

    foreach ( newick_desc_list( $node ) ) { $n += newick_tip_count( $_, 1 ) }

    $n;
}


#-------------------------------------------------------------------------------
#  List of tip nodes:
#
#  @tips = newick_tip_ref_list( $node )
#-------------------------------------------------------------------------------
sub newick_tip_ref_list {
    my ( $node, $not_root ) = @_;

    my $imax = newick_n_desc( $node );
    if ( $imax < 1 ) { return $node }

    my @list = ();

    #  Tree rooted on tip?
    if ( $imax == 1 && ! $not_root && newick_lbl( $node ) ) { push @list, $node }

    foreach ( newick_desc_list( $node ) ) {
        push @list, newick_tip_ref_list( $_, 1 );
    }

    @list;
}


#-------------------------------------------------------------------------------
#  List of tips:
#
#  @tips = newick_tip_list( $node )
#-------------------------------------------------------------------------------
sub newick_tip_list {
    map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
}


#-------------------------------------------------------------------------------
#  First tip node in tree:
#
#  $tipref = newick_first_tip_ref( $node )
#-------------------------------------------------------------------------------
sub newick_first_tip_ref {
    my ( $node, $not_root ) = @_;
    valid_node( $node ) || return  undef;

    #  Arrived at tip, or start of a tip-rooted tree?
    my $n = newick_n_desc( $node );
    if ( ( $n < 1 ) || ( $n == 1 && ! $not_root ) ) { return $node }

    newick_first_tip_ref( newick_desc_i( $node, 1 ), 1 );
}


#-------------------------------------------------------------------------------
#  First tip name in tree:
#
#  $tip = newick_first_tip( $node )
#-------------------------------------------------------------------------------
sub newick_first_tip {
    my ( $noderef ) = @_;

    my $tipref;
    array_ref( $tipref = newick_first_tip_ref( $noderef ) ) ? newick_lbl( $tipref )
                                                          : undef;
}


#-------------------------------------------------------------------------------
#  List of duplicated tip labels.
#
#  @tips = newick_duplicated_tips( $node )
#-------------------------------------------------------------------------------
sub newick_duplicated_tips {
    duplicates( newick_tip_list( $_[0] ) );
}


#-------------------------------------------------------------------------------
#  Tip in tree?
#
#  $bool = newick_tip_in_tree( $node, $tipname )
#-------------------------------------------------------------------------------
sub newick_tip_in_tree {
    my ( $node, $tip, $not_root ) = @_;

    my $n = newick_n_desc( $node );
    if ( $n < 1 ) { return ( newick_lbl( $node ) eq $tip) ? 1 : 0 }

    #  Special case for tree rooted on tip

    if ( $n == 1 && ( ! $not_root ) && newick_lbl( $node ) eq $tip ) { return 1 }

    foreach ( newick_desc_list( $node ) ) {
        if ( newick_tip_in_tree( $_, $tip, 1 ) ) { return 1 }
    }

    0;    #  Fall through means not found
}


#-------------------------------------------------------------------------------
#  Tips shared between 2 trees.
#
#  @tips = newick_shared_tips( $tree1, $tree2 )
#-------------------------------------------------------------------------------
sub newick_shared_tips {
    my ( $Tree1, $Tree2 ) = @_;
    my ( @Tips1 ) = newick_tip_list( $Tree1 );
    my ( @Tips2 ) = newick_tip_list( $Tree2 );
    intersection( \@Tips1, \@Tips2 );
}


#-------------------------------------------------------------------------------
#  Tree length.
#
#  $length = newick_tree_length( $node )
#-------------------------------------------------------------------------------
sub newick_tree_length {
    my ( $node, $not_root ) = @_;

    my $x = $not_root ? newick_x( $node ) : 0;
    defined( $x ) || ( $x = 1 );                #  Convert undefined to 1

    foreach ( newick_desc_list( $node ) ) { $x += newick_tree_length( $_, 1 ) }

    $x;
}


#-------------------------------------------------------------------------------
#  Tree max X.
#
#  $xmax = newick_max_X( $node )
#-------------------------------------------------------------------------------
sub newick_max_X {
    my ( $node, $not_root ) = @_;

    my $xmax = 0;
    foreach ( newick_desc_list( $node ) ) {
        my $x = newick_max_X( $_, 1 );
        if ( $x > $xmax ) { $xmax = $x }
    }

    my $x = $not_root ? newick_x( $node ) : 0;
    $xmax + ( defined( $x ) ? $x : 1 );           #  Convert undefined to 1
}


#-------------------------------------------------------------------------------
#  Most distant tip from root: distance and path.
#
#  ( $xmax, @path ) = newick_most_distant_tip_path( $tree )
#-------------------------------------------------------------------------------
sub newick_most_distant_tip_path {
    my ( $node, $not_root ) = @_;

    my $imax = newick_n_desc( $node );
    my $xmax = ( $imax > 0 ) ? -1 : 0;
    my @pmax = ();
    for ( my $i = 1; $i <= $imax; $i++ ) {
        my ( $x, @path ) = newick_most_distant_tip_path( newick_desc_i( $node, $i ), 1 );
        if ( $x > $xmax ) { $xmax = $x; @pmax = ( $i, @path ) }
    }

    my $x = $not_root ? newick_x( $node ) : 0;
    $xmax += defined( $x ) ? $x : 0;            #  Convert undefined to 1
    ( $xmax, $node, @pmax );
}


#-------------------------------------------------------------------------------
#  Most distant tip from root, and its distance.
#
#  ( $tipref, $xmax ) = newick_most_distant_tip_ref( $tree )
#-------------------------------------------------------------------------------
sub newick_most_distant_tip_ref {
    my ( $node, $not_root ) = @_;

    my $imax = newick_n_desc( $node );
    my $xmax = ( $imax > 0 ) ? -1 : 0;
    my $tmax = $node;
    foreach ( newick_desc_list( $node ) ) {
        my ( $t, $x ) = newick_most_distant_tip_ref( $_, 1 );
        if ( $x > $xmax ) { $xmax = $x; $tmax = $t }
    }

    my $x = $not_root ? newick_x( $node ) : 0;
    $xmax += defined( $x ) ? $x : 1;            #  Convert undefined to 1
    ( $tmax, $xmax );
}


#-------------------------------------------------------------------------------
#  Name of most distant tip from root, and its distance.
#
#  ( $tipname, $xmax ) = newick_most_distant_tip_name( $tree )
#-------------------------------------------------------------------------------
sub newick_most_distant_tip_name {
    my ( $tipref, $xmax ) = newick_most_distant_tip_ref( $_[0] );
    ( newick_lbl( $tipref ), $xmax )
}


#-------------------------------------------------------------------------------
#  Standard node name:
#     Tip label if at a tip
#     Three sorted tip labels intersecting at node, each being smallest
#           of all the tips of their subtrees
#
#  @TipOrTips = std_node_name( $Tree, $Node )
#-------------------------------------------------------------------------------
sub std_node_name {
    my $tree = $_[0];

    #  Node reference is last element of path to node

    my $noderef = ( path_to_node( @_ ) )[-1];
    defined( $noderef ) || return ();

    if ( node_is_tip( $noderef ) || $noderef eq $tree ) {  # Is it a tip?
        return newick_lbl( $noderef );
    }

    #  Work through lists of tips in descendant subtrees, removing them from
    #  @rest, and keeping the best tip for each subtree.

    my @rest = tips_in_newick( $tree );
    my @best = map {
            my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );
            @rest = set_difference( \@rest, \@tips );
            $tips[0];
        } newick_desc_list( $noderef );

    # Best of the rest of the tree
    push @best, ( sort { lc $a cmp lc $b } @rest )[0];

    # Take the top 3, in order:

    ( @best >= 3 ) ? ( sort { lc $a cmp lc $b } @best )[0 .. 2] : ();
}


#===============================================================================
#  Functions to find paths in trees.
#
#  Path descriptions are of form:
#      ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN )   # Always odd
#      () is returned upon failure
#
#  Numbering of descendants is 1-based.
#===============================================================================
#  Path to tip:
#
#  @path = path_to_tip( $treenode, $tipname )
#-------------------------------------------------------------------------------
sub path_to_tip {
    my ( $node, $tip, @path0 ) = @_;

    push @path0, $node;
    my $imax = newick_n_desc( $node );
    if ( $imax < 1 ) { return ( newick_lbl( $node ) eq $tip ) ? @path0 : () }

    #  Special case for tree rooted on tip

    if ( ( $imax  == 1 )                  #  One descendant
      && ( @path0 == 1 )                  #  First step in path
      && ( newick_lbl( $node ) eq $tip )  #  Label matches
       ) { return @path0 }

    my @path;
    for (my $i = 1; $i <= $imax; $i++ ) {
       @path = path_to_tip( newick_desc_i( $node, $i ), $tip, ( @path0, $i ) );
       if ( @path ) { return @path }
    }

    ();  #  Not found
}


#-------------------------------------------------------------------------------
#  Path to named node.
#  Like path to tip, but will find named internal nodes as well.
#
#  @path = path_to_named_node( $treenode, $name )
#-------------------------------------------------------------------------------
sub path_to_named_node {
    my ( $node, $name, @path0 ) = @_;

    push @path0, $node;
    if ( newick_lbl( $node ) eq $name ) { return @path0 }

    my @path;
    my $imax = newick_n_desc( $node );
    for ( my $i = 1; $i <= $imax; $i++ ) {
       @path = path_to_named_node( newick_desc_i( $node, $i ), $name, ( @path0, $i ) );
       if ( @path ) { return @path }
    }

    ();  #  Not found
}


#-------------------------------------------------------------------------------
#  Path to node reference.
#
#  @path = path_to_node_ref( $treenode, $noderef )
#-------------------------------------------------------------------------------
sub path_to_node_ref {
    my ( $node, $noderef, @path0 ) = @_;

    push @path0, $node;
    if ( $node eq $noderef ) { return @path0 }

    my @path;
    my $imax = newick_n_desc( $node );
    for ( my $i = 1; $i <= $imax; $i++ ) {
       @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) );
       if ( @path ) { return @path }
    }

    ();  #  Not found
}


#-------------------------------------------------------------------------------
#  Path to node, as defined by 1 or 3 tips.
#
#  @path = path_to_node( $node,   $tip1, $tip2, $tip3   )  #  3 tip names
#  @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] )  #  Allow array ref
#  @path = path_to_node( $node,   $tip1                 )  #  Use path_to_tip
#  @path = path_to_node( $node, [ $tip1 ]               )  #  Use path_to_tip
#-------------------------------------------------------------------------------
sub path_to_node {
    my ( $node, $tip1, $tip2, $tip3 ) = @_;
    array_ref( $node ) && defined( $tip1 ) || return ();

    # Allow arg 2 to be an array reference
    if ( array_ref( $tip1 ) ) { ( $tip1, $tip2, $tip3 ) = @$tip1 }

    my @p1 = path_to_tip( $node, $tip1 );                #  Path to first tip
    @p1 || return ();                                    #  Was the tip found?
    defined( $tip2 ) && defined( $tip3 ) || return @p1;  #  Two more defined?

    my @p2 = path_to_tip( $node, $tip2 );
    my @p3 = path_to_tip( $node, $tip3 );
    @p2 && @p3 || return ();                             #  Were they found?

    # Find the common prefix for each pair of paths
    my @p12 = common_prefix( \@p1, \@p2 );
    my @p13 = common_prefix( \@p1, \@p3 );
    my @p23 = common_prefix( \@p2, \@p3 );

    # Return the longest common prefix of any two paths
    ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
    ( @p13 >= @p23 )                 ? @p13 :
                                       @p23 ;
}


#-------------------------------------------------------------------------------
#  Distance along path.
#
#  $distance = newick_path_length( @path )
#-------------------------------------------------------------------------------
sub newick_path_length {
    my $node = shift;      #  Discard the first node
    array_ref( $node ) || return undef;
    @_ ? distance_along_path_2( @_ ) : 0;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  This expects to get path minus root node:
#
#  $distance = distance_along_path_2( @path )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub distance_along_path_2 {
    shift;                 #  Discard descendant number
    my $node = shift;
    array_ref( $node ) || return undef;
    my $d1 = newick_x( $node );
    my $d2 = @_ ? distance_along_path_2(@_) : 0;
    defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef;
}


#-------------------------------------------------------------------------------
#  Tip-to-tip distance.
#
#  $distance = tip_to_tip_distance( $tree, $tip1, $tip2 )
#-------------------------------------------------------------------------------
sub tip_to_tip_distance {
    my ( $node, $tip1, $tip2 ) = @_;

    array_ref( $node ) && defined( $tip1 )
                       && defined( $tip2 ) || return undef;
    my @p1 = path_to_tip( $node, $tip1 );
    my @p2 = path_to_tip( $node, $tip2 );
    @p1 && @p2 || return undef;                          # Were they found?

    # Find the unique suffixes of the two paths
    my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost
    my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
    my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;

    defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef;
}


#-------------------------------------------------------------------------------
#  Node-to-node distance.
#  Nodes can be:   $tipname
#                [ $tipname ]
#                [ $tipname1, $tipname2, $tipname3 ]
#
#  $distance = node_to_node_distance( $tree, $node1, $node2 )
#-------------------------------------------------------------------------------
sub node_to_node_distance {
    my ( $node, $node1, $node2 ) = @_;

    array_ref( $node ) && defined( $node1 )
                       && defined( $node2 ) || return undef;
    my @p1 = path_to_node( $node, $node1 );
    my @p2 = path_to_node( $node, $node2 );
    @p1 && @p2 || return undef;                          # Were they found?

    # Find the unique suffixes of the two paths
    my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost
    my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
    my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;

    defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef;
}


#===============================================================================
#  Tree manipulations:
#===============================================================================
#  Copy tree.
#  Lists are copied, except that references to empty lists go to undef.
#  Only defined fields are added, so tree list may be shorter than 8 fields.
#
#  $treecopy = copy__newick_tree( $tree )
#-------------------------------------------------------------------------------
sub copy__newick_tree {
    my ( $node ) = @_;
    array_ref( $node ) || return undef;

    my $nn = [];  #  Reference to a new node structure
    #  Build a new descendant list, if not empty
    my @dl = newick_desc_list( $node );
    set_newick_desc_ref( $nn, @dl ? [ map { copy__newick_tree( $_ ) } @dl ]
                                  : undef
                       );

    #  Copy label and x, if defined
    my ( $l, $x );
    if ( defined( $l = newick_lbl( $node ) ) ) { set_newick_lbl( $nn, $l ) }
    if ( defined( $x = newick_x(   $node ) ) ) { set_newick_x(   $nn, $x ) }

    #  Build new comment lists, when not empty ( does not extend array unless
    #  necessary)
    my $c;
    if ( $c = newick_c1( $node ) and @$c ) { set_newick_c1( $nn, [ @$c ] ) }
    if ( $c = newick_c2( $node ) and @$c ) { set_newick_c2( $nn, [ @$c ] ) }
    if ( $c = newick_c3( $node ) and @$c ) { set_newick_c3( $nn, [ @$c ] ) }
    if ( $c = newick_c4( $node ) and @$c ) { set_newick_c4( $nn, [ @$c ] ) }
    if ( $c = newick_c5( $node ) and @$c ) { set_newick_c5( $nn, [ @$c ] ) }

    $nn;
}


#-------------------------------------------------------------------------------
#  Use a hash to relabel the nodes in a newick tree.
#
#  $newtree = newick_relabel_nodes( $node, \%new_name )
#-------------------------------------------------------------------------------
sub newick_relabel_nodes {
    my ( $node, $new_name ) = @_;

    my ( $lbl, $new );
    if ( defined( $lbl = newick_lbl( $node ) )
      && ( $lbl ne "" )
      && defined( $new = $new_name->{ $lbl } )
       ) {
        set_newick_lbl( $node, $new );
    }

    foreach ( newick_desc_list( $node ) ) {
        newick_relabel_nodes( $_, $new_name );
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Use a hash to relabel the nodes in a newick tree (case insensitive).
#
#  $newtree = newick_relabel_nodes_i( $node, \%new_name )
#-------------------------------------------------------------------------------
sub newick_relabel_nodes_i {
    my ( $node, $new_name ) = @_;

    #  Add any necessary lowercase keys to the hash:

    my $lc_lbl;
    foreach ( keys %$new_name ) {
        $lc_lbl = lc $_;
        ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } );
    }

    newick_relabel_nodes_i2( $node, $new_name );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Do the actual relabeling
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub newick_relabel_nodes_i2 {
    my ( $node, $new_name ) = @_;

    my ( $lbl, $new );
    if ( defined( $lbl = newick_lbl( $node ) )
      && ( $lbl ne "" )
      && defined( $new = $new_name->{ lc $lbl } )
       ) {
        set_newick_lbl( $node, $new );
    }

    foreach ( newick_desc_list( $node ) ) {
        newick_relabel_nodes_i2( $_, $new_name );
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Use a hash to relabel the tips in a newick tree.
#
#  $newtree = newick_relabel_tips( $node, \%new_name )
#-------------------------------------------------------------------------------
sub newick_relabel_tips {
    my ( $node, $new_name ) = @_;

    my @desc = newick_desc_list( $node );

    if ( @desc ) {
        foreach ( @desc ) { newick_relabel_tips( $_, $new_name ) }
    }
    else {
        my ( $lbl, $new );
        if ( defined( $lbl = newick_lbl( $node ) )
          && ( $lbl ne "" )
          && defined( $new = $new_name->{ $lbl } )
           ) {
            set_newick_lbl( $node, $new );
        }
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Use a hash to relabel the tips in a newick tree (case insensitive).
#
#  $newtree = newick_relabel_tips_i( $node, \%new_name )
#-------------------------------------------------------------------------------
sub newick_relabel_tips_i {
    my ( $node, $new_name ) = @_;

    #  Add any necessary lowercase keys to the hash:

    my $lc_lbl;
    foreach ( keys %$new_name ) {
        $lc_lbl = lc $_;
        ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } );
    }

    newick_relabel_tips_i2( $node, $new_name );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Do the actual relabeling
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub newick_relabel_tips_i2 {
    my ( $node, $new_name ) = @_;

    my @desc = newick_desc_list( $node );

    if ( @desc ) {
        foreach ( @desc ) { newick_relabel_tips_i2( $_, $new_name ) }
    }
    else {
        my ( $lbl, $new );
        if ( defined( $lbl = newick_lbl( $node ) )
          && ( $lbl ne "" )
          && defined( $new = $new_name->{ lc $lbl } )
           ) {
            set_newick_lbl( $node, $new );
        }
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Set undefined branch lenghts (except root) to length x.
#
#  $n_changed = newick_set_undefined_branches( $node, $x )
#-------------------------------------------------------------------------------
sub newick_set_undefined_branches {
    my ( $node, $x, $not_root ) = @_;

    my $n = 0;
    if ( $not_root && ! defined( newick_x( $node ) ) ) {
        set_newick_x( $node, $x );
        $n++;
    }

    foreach ( newick_desc_list( $node ) ) {
        $n += newick_set_undefined_branches( $_, $x, 1 );
    }

    $n;
}


#-------------------------------------------------------------------------------
#  Set all branch lenghts (except root) to length x.
#
#  $n_changed = newick_set_all_branches( $node, $x )
#-------------------------------------------------------------------------------
sub newick_set_all_branches {
    my ( $node, $x, $not_root ) = @_;

    my $n = 0;
    if ( $not_root ) {
        set_newick_x( $node, $x );
        $n++;
    }

    foreach ( newick_desc_list( $node ) ) {
        $n += newick_set_all_branches( $_, $x, 1 );
    }

    $n;
}


#-------------------------------------------------------------------------------
#  Normalize tree order (in place).
#
#  ( $tree, $label1 ) = normalize_newick_tree( $tree )
#-------------------------------------------------------------------------------
sub normalize_newick_tree {
    my ( $node ) = @_;

    my @descends = newick_desc_list( $node );
    if ( @descends == 0 ) { return ( $node, lc newick_lbl( $node ) ) }

    my %hash = map { (normalize_newick_tree( $_ ))[1] => $_ } @descends;
    my @keylist = sort { $a cmp $b } keys %hash;
    set_newick_desc_list( $node, map { $hash{$_} } @keylist );

    ( $node, $keylist[0] );
}


#-------------------------------------------------------------------------------
#  Reverse tree order (in place).
#
#  $tree = reverse_newick_tree( $tree )
#-------------------------------------------------------------------------------
sub reverse_newick_tree {
    my ( $node ) = @_;

    my @descends = newick_desc_list( $node );
    if ( @descends ) {
        set_newick_desc_list( $node, reverse @descends );
        foreach ( @descends ) { reverse_newick_tree( $_ ) }
    }
    $node;
}


#-------------------------------------------------------------------------------
#  Standard unrooted tree (in place).
#
#  $stdtree = std_unrooted_newick( $tree )
#-------------------------------------------------------------------------------
sub std_unrooted_newick {
    my ( $tree ) = @_;

    my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
    ( normalize_newick_tree( reroot_newick_next_to_tip( $tree, $mintip ) ) )[0];
}


#-------------------------------------------------------------------------------
#  Move largest groups to periphery of tree (in place).
#
#      dir  <= -2 for up-sweeping tree (big groups always first),
#            = -1 for big group first, balanced tree,
#            =  0 for balanced tree,
#            =  1 for small group first, balanced tree, and
#           >=  2 for down-sweeping tree (small groups always top)
#
#  $tree = aesthetic_newick_tree( $treeref, $dir )
#-------------------------------------------------------------------------------
sub aesthetic_newick_tree {
    my ( $tree, $dir ) = @_;
    my %cnt;

    $dir = ! $dir       ?        0 :  #  Undefined or zero
             $dir <= -2 ? -1000000 :
             $dir <   0 ?       -1 :
             $dir >=  2 ?  1000000 :
                                 1 ;
    build_tip_count_hash( $tree, \%cnt );
    reorder_by_tip_count( $tree, \%cnt, $dir );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Build a hash to look up the number of descendants of each node.
#  Access count with $cntref->{$noderef}
#
#  $count = build_tip_count_hash( $node, $cnt_hash_ref )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub build_tip_count_hash {
    my ( $node, $cntref ) = @_;
    my ( $i, $cnt );

    if ( newick_n_desc( $node ) < 1 ) { $cnt = 1 }
    else {
        $cnt = 0;
        foreach ( newick_desc_list( $node ) ) {
            $cnt += build_tip_count_hash( $_, $cntref );
        }
    }

    $cntref->{$node} = $cnt;
    $cnt;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $node = reorder_by_tip_count( $node, $cntref, $dir )
#      dir  < 0 for upward branch (big group first),
#           = 0 for no change, and
#           > 0 for downward branch (small group first).
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub reorder_by_tip_count {
    my ( $node, $cntref, $dir ) = @_;

    my $nd = newick_n_desc( $node );
    if ( $nd <  1 ) { return $node }       #  Do nothing to a tip

    #  Reorder this subtree:

    my $dl_ref = newick_desc_ref( $node );
    if    ( $dir < 0 ) {                   #  Big group first
        @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
    }
    elsif ( $dir > 0 ) {                   #  Small group first
        @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;
    }

    #  Reorder within descendant subtrees:

    my $step = 0;
    if ( abs( $dir ) < 1e5 ) {
        $dir = 1 - $nd;                              #  Midgroup => as is
    #   $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 );  #  Midgroup => outward
        $step = 2;
    }

    for ( my $i = 0; $i < $nd; $i++ ) {
        reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir );
        $dir += $step;
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Move smallest groups to periphery of tree (in place).
#
#      dir  <= -2 for up-sweeping tree (big groups always first),
#            = -1 for big group first, balanced tree,
#            =  0 for balanced tree,
#            =  1 for small group first, balanced tree, and
#           >=  2 for down-sweeping tree (small groups always top)
#
#  $tree = unaesthetic_newick_tree( $treeref, $dir )
#-------------------------------------------------------------------------------
sub unaesthetic_newick_tree {
    my ( $tree, $dir ) = @_;
    my %cnt;

    $dir = ! $dir       ?        0 :  #  Undefined or zero
             $dir <= -2 ? -1000000 :
             $dir <   0 ?       -1 :
             $dir >=  2 ?  1000000 :
                                 1 ;
    build_tip_count_hash( $tree, \%cnt );
    reorder_against_tip_count( $tree, \%cnt, $dir );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $node = reorder_by_tip_count( $node, $cntref, $dir )
#      dir  < 0 for upward branch (big group first),
#           = 0 for no change, and
#           > 0 for downward branch (small group first).
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub reorder_against_tip_count {
    my ( $node, $cntref, $dir ) = @_;

    my $nd = newick_n_desc( $node );
    if ( $nd <  1 ) { return $node }       #  Do nothing to a tip

    #  Reorder this subtree:

    my $dl_ref = newick_desc_ref( $node );
    if    ( $dir > 0 ) {                   #  Big group first
        @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
    }
    elsif ( $dir < 0 ) {                   #  Small group first
        @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;
    }

    #  Reorder within descendant subtrees:

    my $step = 0;
    if (abs( $dir ) < 1e5) {
        $dir = 1 - $nd;                              #  Midgroup => as is
    #   $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 );  #  Midgroup => outward
        $step = 2;
    }

    for ( my $i = 0; $i < $nd; $i++ ) {
        reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir );
        $dir += $step;
    }

    $node;
}


#-------------------------------------------------------------------------------
#  Randomize descendant order at each node (in place).
#
#  $tree = random_order_newick_tree( $tree )
#-------------------------------------------------------------------------------
sub random_order_newick_tree {
    my ( $node ) = @_;

    my $nd = newick_n_desc( $node );
    if ( $nd <  1 ) { return $node }       #  Do nothing to a tip

    #  Reorder this subtree:

    my $dl_ref = newick_desc_ref( $node );
    @$dl_ref = random_order( @$dl_ref );

    #  Reorder descendants:

    foreach ( @$dl_ref ) { random_order_newick_tree( $_ ) }

    $node;
}


#-------------------------------------------------------------------------------
#  Reroot a tree to the node that lies at the end of a path.
#
#  $newtree = reroot_newick_by_path( @path )
#-------------------------------------------------------------------------------
sub reroot_newick_by_path {
    my ( $node1, $path1, @rest ) = @_;
    array_ref( $node1 ) || return undef;      #  Always expect a node

    defined( $path1 ) && @rest || return $node1;  #  If no path, we're done

    my $node2 = $rest[0];                     #  Next element in path is node 2
    newick_desc_i( $node1, $path1 ) eq $node2 || return undef;  #  Check link

    #  Remove node 2 from node 1 descendant list.  Could use a simple splice:
    #
    #      splice( @$dl1, $path1-1, 1 );
    #
    #  But this maintains the cyclic order of the nodes:

    my $dl1 = newick_desc_ref( $node1 );
    my $nd1 = @$dl1;
    if    ( $path1 == 1    ) { shift @$dl1 }
    elsif ( $path1 == $nd1 ) { pop   @$dl1 }
    else                     { @$dl1 = ( @$dl1[ $path1 .. $nd1-1   ]
                                       , @$dl1[ 0      .. $path1-2 ]
                                       )
    }

    #  Append node 1 to node 2 descendant list (does not alter numbering):

    my $dl2 = newick_desc_ref( $node2 );
    if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
    else                     { set_newick_desc_list( $node2, [ $node1 ] ) }

    #  Move c1 comments from node 1 to node 2:

    my $C11 = newick_c1( $node1 );
    my $C12 = newick_c1( $node2 );
    ! defined( $C11 ) || set_newick_c1( $node1, undef ); #  Remove them from node 1
    if ( $C12 && @$C12 ) {                      #  If node 2 comments and
        if ( $C11 && @$C11 ) { unshift @$C12, @$C11 } #  Node 1, prefix 1 to 2
    }
    elsif ( $C11 && @$C11 ) { set_newick_c1( $node2, $C11 ) }     #  Otherwise move node 1 link

    #  Swap branch lengths and comments for reversal of link direction:

    my $x1 = newick_x( $node1 );
    my $x2 = newick_x( $node2 );
    ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node1, $x2 );
    ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node2, $x1 );

    my $c41 = newick_c4( $node1 );
    my $c42 = newick_c4( $node2 );
    ! defined( $c42 ) || ! @$c42 || set_newick_c4( $node1, $c42 );
    ! defined( $c41 ) || ! @$c41 || set_newick_c4( $node2, $c41 );

    my $c51 = newick_c5( $node1 );
    my $c52 = newick_c5( $node2 );
    ! defined( $c52 ) || ! @$c52 || set_newick_c5( $node1, $c52 );
    ! defined( $c51 ) || ! @$c51 || set_newick_c5( $node2, $c51 );

    reroot_newick_by_path( @rest );           #  Node 2 is first element of rest
}


#-------------------------------------------------------------------------------
#  Move root of tree to named tip.
#
#  $newtree = reroot_newick_to_tip( $tree, $tip )
#-------------------------------------------------------------------------------
sub reroot_newick_to_tip {
    my ( $tree, $tipname ) = @_;
    reroot_newick_by_path( path_to_tip( $tree, $tipname ) );
}


#-------------------------------------------------------------------------------
#  Move root of tree to be node adjacent to a named tip.
#
#  $newtree = reroot_newick_next_to_tip( $tree, $tip )
#-------------------------------------------------------------------------------
sub reroot_newick_next_to_tip {
    my ( $tree, $tipname ) = @_;
    my @path = path_to_tip( $tree, $tipname );
    @path || return undef;
    @path == 1 ? reroot_newick_by_path( $tree, 1, newick_desc_i( $tree, 1 ) )
               : reroot_newick_by_path( @path[0 .. @path-3] );
}


#-------------------------------------------------------------------------------
#  Move root of tree to a node, defined by 1 or 3 tip names.
#
#  $newtree = reroot_newick_to_node( $tree, @node )
#-------------------------------------------------------------------------------
sub reroot_newick_to_node {
    reroot_newick_by_path( path_to_node( @_ ) );
}


#-------------------------------------------------------------------------------
#  Move root of tree to a node, defined by reference.
#
#  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
#-------------------------------------------------------------------------------
sub reroot_newick_to_node_ref {
    my ( $tree, $node ) = @_;
    reroot_newick_by_path( path_to_node_ref( $tree, $node ) );
}


#-------------------------------------------------------------------------------
#  Move root of tree to an approximate midpoint.
#
#  $newtree = reroot_newick_to_approx_midpoint( $tree )
#-------------------------------------------------------------------------------
sub reroot_newick_to_approx_midpoint {
    my ( $tree ) = @_;
    #  Compile average tip to node distances assending

    my $dists1 = average_to_tips_1( $tree );

    #  Compile average tip to node distances descending, returning midpoint node

    my $node = average_to_tips_2( $dists1, undef, undef );

    #  Reroot

    $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree
}


sub average_to_tips_1 {
    my ( $node ) = @_;

    my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node );
    my $x_below = 0;
    if ( @desc_dists )
    {
        foreach ( @desc_dists ) { $x_below += $_->[0] }
        $x_below /= @desc_dists;
    }
    my $x = newick_x( $node ) || 0;
    my $x_net = $x_below + $x;

    [ $x_net, $x, $x_below, [ @desc_dists ], $node ]
}

    
sub average_to_tips_2 {
    my ( $dists1, $x_above, $anc_node ) = @_;
    my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1;

    #  Are we done?  Root is in this node's branch, or "above"?

    # defined( $x_above ) and print STDERR "x_above = $x_above\n";
    # print STDERR "x       = $x\n";
    # print STDERR "x_below = $x_below\n";
    # print STDERR "n_desc  = ", scalar @$desc_list, "\n\n";

    if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) )
    {
        #  At this point the root can only be in this node's branch,
        #  or "above" it in the current rooting of the tree (which
        #  would mean that the midpoint is actually down a different
        #  path from the root of the current tree).
        #
        #  Is the root in the current branch?

        if ( ( $x_below + $x ) >= $x_above )
        {
            return ( $x_above >= $x_below ) ? $anc_node : $node;
        }
        else
        {
            return undef;
        }
    }

    #  The root must be some where below this node:

    my $n_1      =   @$desc_list + ( $anc_node ? 1 : 0 ) - 1;
    my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 );

    foreach ( @$desc_list )
    {
        #  If input tree is tip_rooted, $n-1 can be 0, so:

        my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0;
        my $root = average_to_tips_2( $_, $above2, $node );
        if ( $root ) { return $root }
    }

    #  Was not anywhere below this node (oh-oh):

    return undef;
}

    
#-------------------------------------------------------------------------------
#  Move root of tree from tip to adjacent node.
#
#  $newtree = uproot_tip_rooted_newick( $tree )
#-------------------------------------------------------------------------------
sub uproot_tip_rooted_newick {
    my ( $node ) = @_;
    newick_is_tip_rooted( $node ) || return $node;

    #  Path to the sole descendant:

    reroot_newick_by_path( $node, 1, newick_desc_i( $node, 1 ) );
}


#-------------------------------------------------------------------------------
#  Remove root bifurcation.
#
#  Root node label, label comment and descendant list comment are discarded.
#
#  $newtree = uproot_newick( $tree )
#-------------------------------------------------------------------------------
sub uproot_newick {
    my ( $node0 ) = @_;
    newick_is_rooted( $node0 ) || return $node0;

    my ( $node1, $node2 ) = newick_desc_list( $node0 );

    #  Ensure that node1 has at least 1 descendant

    if    ( newick_n_desc( $node1 ) ) {
        push @{ newick_desc_ref( $node1 ) }, $node2;    #  Add node2 to descend list
    }

    #  Or node2 has at least 1 descendant

    elsif ( newick_n_desc( $node2 ) ) {
        unshift @{ newick_desc_ref( $node2 ) }, $node1;   #  Add node1 to descend list
        ( $node1, $node2 ) = ( $node2, $node1 );        #  And reverse labels
    }

    #  We could make this into a tip rooted tree, but for now:

    else { return $node0 }

    #  Prefix node1 branch to that of node2:

    add_to_newick_branch( $node2, $node1 );
    set_newick_x( $node1, undef );

    #  Tree prefix comment lists (as references):

    my $C10 = newick_c1( $node0 );
    my $C11 = newick_c1( $node1 );
    if ( $C11 && @$C11 ) { 
        if ( $C10 && @$C10 ) { unshift @$C11, @$C10 } #  Prefix to node1 comments
    }
    elsif ( $C10 && @$C10 ) {
        set_newick_c1( $node1, $C10 )          #  Or move node0 comments to node1
    }

    $node1;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Prefix branch of node2 to that of node1:
#
#  $node1 = add_to_newick_branch( $node1, $node2 )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub add_to_newick_branch {
    my ( $node1, $node2 ) = @_;
    array_ref( $node1 ) || die "add_to_newick_branch: arg 1 not array ref\n";
    array_ref( $node2 ) || die "add_to_newick_branch: arg 2 not array ref\n";

    #  Node structure template:
    #     my ( $DL, $L, $X, $C1, $C2, $C3, $C4, $C5 ) = @$node;

    #  Fix branch lengths for joining of two branches:

    set_newick_x( $node1, newick_x( $node1 ) + newick_x( $node2 ) );

    #  Merge branch length comments:

    my $C41 = newick_c4( $node1 );  #  Ref to node1 C4
    my $C42 = newick_c4( $node2 );  #  Ref to node2 C4
    if ( $C41 && @$C41 ) {
        if ( $C42 && @$C42 ) { unshift @$C41, @$C42 }         #  Add node2 comment
    }
    elsif ( $C42 && @$C42 ) { set_newick_c4( $node1, $C42 ) } #  Or move node1 comment

    my $C51 = newick_c5( $node1 );  #  Ref to node1 C5
    my $C52 = newick_c5( $node2 );  #  Ref to node2 C5
    if ( $C51 && @$C51 ) {
        if ( $C52 && @$C52 ) { unshift @$C51, @$C52 }         #  Add node2 comment
    }
    elsif ( $C52 && @$C52 ) { set_newick_c5( $node1, $C52 ) } #  Or move node1 comment

    $node1;
}


#-------------------------------------------------------------------------------
#  Prune one or more tips from a tree:
#     Caveat:  if one tip is listed, the original tree is modified.
#              if more than one tip is listed, a copy of the tree is returen
#                   (even if it is just listing the same tip twice!).
#
#  $newtree = prune_from_newick( $tree,  $tip  )
#  $newtree = prune_from_newick( $tree,  @tips )
#  $newtree = prune_from_newick( $tree, \@tips )
#-------------------------------------------------------------------------------
sub prune_from_newick {
    my ( $tr, @tips ) = @_;
    if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }
    
    if ( @tips == 0 ) { return $tr }
    if ( @tips == 1 ) { return prune_1_from_newick( $tr, @tips ) }

    my %del  = map  { ( $_, 1 ) } @tips;
    my @keep = grep { ! $del{ $_ } } newick_tip_list( $tr );
    newick_subtree( $tr, @keep );
}


#-------------------------------------------------------------------------------
#  Prune a tip from a tree:
#
#  $newtree = prune_1_from_newick( $tree, $tip )
#-------------------------------------------------------------------------------
sub prune_1_from_newick {
    my ( $tr, $tip ) = @_;
    my @path = path_to_tip( $tr, $tip );
    if ( @path < 3 ) { return $tr }

    my $node = $path[-1];  #  Node with the tip
    my $i1   = $path[-2];  #  Descendant number of node in ancestor desc list
    my $anc1 = $path[-3];  #  Ancestor of node
    my $nd1  = newick_n_desc( $anc1 );  #  Number of descendants of ancestor
    my $anc2 = ( @path >= 5 ) ? $path[-5] : undef; # Ancestor of anc1

    # dump_tree( $node );
    # print STDERR "i1 = $i1\n";
    # dump_tree( $anc1 );
    # print STDERR "nd1 = $nd1\n";
    # defined( $anc2 ) && dump_tree( $anc2 );

    if ( $nd1 > 3 || ( $anc2 && $nd1 > 2 ) ) {   # Tip joins at multifurcation
        splice( @{ $anc1->[0] }, $i1-1, 1 );     #    delete the descendant
    }

    elsif ( $anc2 ) {                            # Tip joins at internal bifurcation
        my $sis = newick_desc_i( $anc1, 3-$i1 );     # find sister node
        add_to_newick_branch( $sis, $anc1 );         # combine internal branches
        set_newick_desc_i( $anc2, $path[-4], $sis ); # remove $anc1
    }

    elsif ( $nd1 == 2) {                         # Tip joins bifurcating root node
        my $sis = newick_desc_i( $anc1, 3-$i1 ); #    find sister node
        $sis->[1] = $anc1->[1] if ! $sis->[1] && $anc1->[1];  # root label
        $sis->[2] = undef;                                    # root branch len
        $sis->[3] = $anc1->[3] if ! $sis->[3] && $anc1->[3];  # tree comment
        $sis->[4] = $anc1->[4] if ! $sis->[4] && $anc1->[4];  # desc list comment
        $sis->[5] = $anc1->[5] if ! $sis->[5] && $anc1->[5];  # label comment 
        $sis->[6] = undef      if   $sis->[6];   #    root branch comment
        $sis->[7] = undef      if   $sis->[7];   #    root branch comment
        $tr = $sis;                              #    sister is new root
    }

    elsif ( $nd1 == 3 ) {                        # Tip joins trifurcating root:
        splice( @{ $anc1->[0] }, $i1-1, 1 );     #    delete the descendant, and
        $tr = uproot_newick( $tr );              #    fix the rooting
    }

    else {
        return undef;
    }

    return $tr;
}


#-------------------------------------------------------------------------------
#  Produce a subtree with the desired tips:
#
#     Except for (some) tip nodes, the tree produced is a copy.
#     There is no check that requested tips exist.
#
#  $newtree = newick_subtree( $tree,  @tips )
#  $newtree = newick_subtree( $tree, \@tips )
#-------------------------------------------------------------------------------
sub newick_subtree {
    my ( $tr, @tips ) = @_;
    if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } }

    if ( @tips < 2 ) { return undef }
    my $was_rooted = newick_is_rooted( $tr );
    my $keephash = { map { ( $_, 1 ) } @tips };
    my $tr2 = subtree1( $tr, $keephash );
    $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 );
    $tr2->[2] = undef if $tr2;                   # undef root branch length
    $tr2;
}


sub subtree1 {
    my ( $tr, $keep ) = @_;
    my @desc1 = newick_desc_list( $tr );

    #  Is this a tip, and is it in the keep list?

    if ( @desc1 < 1 ) {
        return ( $keep->{ newick_lbl( $tr ) } ) ? $tr : undef;
    }

    #  Internal node: analyze the descendants:

    my @desc2 = ();
    foreach ( @desc1 ) {
        my $desc = subtree1( $_, $keep );
        if ( $desc && @$desc ) { push @desc2, $desc }
    }

    if ( @desc2 == 0 ) { return undef }
    if ( @desc2 >  1 ) { return [ \@desc2, @$tr[ 1 .. @$tr - 1 ] ] }

    #  Exactly 1 descendant

    my $desc = $desc2[ 0 ];
    my @nn = ( $desc->[0],
               $desc->[1] ? $desc->[1] : $tr->[1],
               defined( $tr->[2] ) ? $desc->[2] + $tr->[2] : undef
             );

    #  Merge comments (only recreating the ones that existed):

    if ( $tr->[3] && @{$tr->[3]} || $desc->[3] && @{$desc->[3]} ) {
        $nn[3] = [ $tr->[3] ? @{$tr->[3]} : (), $desc->[3] ? @{$desc->[3]} : () ];
    }
    if ( $tr->[4] && @{$tr->[4]} || $desc->[4] && @{$desc->[4]} ) {
        $nn[4] = [ $tr->[4] ? @{$tr->[4]} : (), $desc->[4] ? @{$desc->[4]} : () ];
    }
    if ( $tr->[5] && @{$tr->[5]} || $desc->[5] && @{$desc->[5]} ) {
        $nn[5] = [ $tr->[5] ? @{$tr->[5]} : (), $desc->[5] ? @{$desc->[5]} : () ];
    }
    if ( $tr->[6] && @{$tr->[6]} || $desc->[6] && @{$desc->[6]} ) {
        $nn[6] = [ $tr->[6] ? @{$tr->[6]} : (), $desc->[6] ? @{$desc->[6]} : () ];
    }
    if ( $tr->[7] && @{$tr->[7]} || $desc->[7] && @{$desc->[7]} ) {
        $nn[7] = [ $tr->[7] ? @{$tr->[7]} : (), $desc->[7] ? @{$desc->[7]} : () ];
    }

    return \@nn;
}


#===============================================================================
#
#  Tree writing and reading
#
#===============================================================================
#  writeNewickTree( $tree [, $file ] )
#-------------------------------------------------------------------------------
sub writeNewickTree {
    my ( $tree, $file ) = @_;
    $file || ( $file = *STDOUT );
    print  $file  ( strNewickTree( $tree ), "\n" );
}


#-------------------------------------------------------------------------------
#  fwriteNewickTree( $file, $tree )     #  Args reversed to writeNewickTree
#-------------------------------------------------------------------------------
sub fwriteNewickTree { writeNewickTree( $_[1], $_[0] ) }


#-------------------------------------------------------------------------------
#  $treestring = strNewickTree( $tree )
#-------------------------------------------------------------------------------
sub strNewickTree {
    my $node = shift @_;
    strNewickSubtree( $node, "" ) . ";";
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $string = strNewickSubtree( $node, $prefix )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub strNewickSubtree {
    my ( $node, $prefix ) = @_;
    my  $s;

    $s = strNewickComments( newick_c1( $node ), $prefix );
    if ( $s ) { $prefix = " " }

    my $ndesc;
    if ( $ndesc = newick_n_desc( $node ) ) {
        for (my $d = 1; $d <= $ndesc; $d++) {
            $s .= ( ( $d == 1 )  ?  $prefix . "("  :  "," )
               .  strNewickSubtree( newick_desc_i( $node, $d ), " " );
        }

        $s .= ")" . strNewickComments( newick_c2( $node ), " " );
        $prefix = " ";
    }

    if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) {
        $s .= $prefix
           .  q_newick_lbl( $node )
           .  strNewickComments( newick_c3( $node ), " " );
    }

    if ( defined( newick_x( $node ) ) ) {
        $s .= ":"
           .   strNewickComments( newick_c4( $node ), " " )
           .   sprintf( " %.6f", newick_x( $node ) )
           .   strNewickComments( newick_c5( $node ), " " );
    }

    $s;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $string = strNewickComments( $clist, $prefix )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub strNewickComments {
    my ( $clist, $prefix ) = @_;
    array_ref( $clist ) && ( @$clist > 0 ) || return  "";
    $prefix . "[" . join( "] [", @$clist ) . "]";
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $quoted_label = q_newick_lbl( $label )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub q_newick_lbl {
    my $lbl = newick_lbl( $_[0] );
    defined( $lbl ) && ( $lbl ne "" ) || return undef;

    if ( $lbl =~ m/^[^][()_:;,]+$/        #  Anything but []()_:;,
      && $lbl !~ m/^'/  ) {               #     and does not start with '
        $lbl =~ s/ /_/g;                  #  Recode blanks as _
        return $lbl;
    }

    else {
        $lbl =~ s/'/''/g;                 #  Double existing single quote marks
        return  q(') . $lbl . q(');       #  Wrap in single quote marks
    }
}


#===============================================================================
#  $treestring = formatNewickTree( $tree )
#===============================================================================
sub formatNewickTree {
    formatNewickSubtree( $_[0], "", "" ) . ";";
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $string = formatNewickSubtree( $node, $prefix, $indent )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub formatNewickSubtree {
    my ( $node, $prefix, $indent ) = @_;
    my  $s;

    $s = formatNewickComments( newick_c1( $node ), $prefix, $indent );
    if ( $s ) { $prefix = "\n$indent" }

    if ( my $ndesc = newick_n_desc( $node ) ) {
        for (my $d = 1; $d <= $ndesc; $d++) {
            $s .= ( ( $d == 1 )  ?  $prefix . "("  :  ",\n$indent " )
               .  formatNewickSubtree( newick_desc_i( $node, $d ), " ", $indent . "  " );
        }

        $s .= "\n$indent)" . formatNewickComments( newick_c2( $node ), " ", $indent );
        $prefix = " ";
    }

    if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) {
        $s .= $prefix
           .  q_newick_lbl( $node )
           .  formatNewickComments( newick_c3( $node ), " ", $indent );
    }

    if ( defined( newick_x( $node ) ) ) {
        $s .= ":"
           .   formatNewickComments( newick_c4( $node ), " ", $indent )
           .   sprintf(" %.6f", newick_x( $node ) )
           .   formatNewickComments( newick_c5( $node ), " ", $indent );
    }

    $s;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $string = formatNewickComments( $clist, $prefix, $indent )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub formatNewickComments {
    my ( $clist, $prefix, $indent ) = @_;
    array_ref( $clist ) && @$clist || return  "";
    $prefix . "[" . join( "] [", @$clist ) . "]";
}


#===============================================================================
#  Tree reader adapted from the C language reader in fastDNAml
#
#  $tree = parse_newick_tree_str( $string )
#===============================================================================
sub parse_newick_tree_str {
    my $s = shift @_;

    my ( $ind, $rootnode ) = parse_newick_subtree( $s, 0 );
    if ( substr( $s, $ind, 1 ) ne ";") { warn "warning: tree missing ';'\n" }
    $rootnode;
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Read a subtrees recursively (everything of tree but a semicolon)
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub parse_newick_subtree {
    my ( $s, $ind ) = @_;

    my $newnode = [];
    my @dlist   = ();
    my ( $lbl, $x, $c1, $c2, $c3, $c4, $c5 );
 
    ( $ind, $c1 ) = getNextTreeChar( $s, $ind );       #  Comment 1
    if ( ! defined( $ind ) ) { treeParseError( "missing subtree" ) }
    if ( $c1 && @$c1 ) { set_newick_c1( $newnode, $c1 ) }

    if ( substr( $s, $ind, 1 ) eq "(" ) {                #  New internal node
        while ( ! @dlist || ( substr( $s, $ind, 1 ) eq "," ) ) {
            my $desc;
            ( $ind, $desc ) = parse_newick_subtree( $s, $ind+1 );
            if (! $ind) { return () }
            push @dlist, $desc;
        }
        if ( substr( $s, $ind, 1 ) ne ")" ) { treeParseError( "missing ')'" ) }

        ( $ind, $c2 ) = getNextTreeChar( $s, $ind+1 );   #  Comment 2
        if ( $c2 && @$c2 ) { set_newick_c2( $newnode, $c2 ) }
        ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); #  Node label
    }

    elsif ( substr( $s, $ind, 1 ) =~ /[^][(,):;]/ ) {    #  New tip
        ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); #  Tip label
        if (! $ind) { return () }
    }

    @dlist || $lbl || treeParseError( "no descendant list or label" );

    if ( @dlist ) { set_newick_desc_ref( $newnode, \@dlist ) }
    if ( $lbl   ) { set_newick_lbl( $newnode, $lbl ) }

    ( $ind, $c3 ) = getNextTreeChar( $s, $ind );         #  Comment 3
    if ( $c3 && @$c3 ) { set_newick_c3( $newnode, $c3 ) }

    if (substr( $s, $ind, 1 ) eq ":") {                  #  Branch length
        ( $ind, $c4 ) = getNextTreeChar( $s, $ind+1 );   #  Comment 4
        if ( $c4 && @$c4 ) { set_newick_c4( $newnode, $c4 ) }
        ( $ind, $x ) = parseBranchLength( $s, $ind );
        if ( defined( $x ) ) { set_newick_x( $newnode, $x ) }
        ( $ind, $c5 ) = getNextTreeChar( $s, $ind );     #  Comment 5
        if ( $c5 && @$c5 ) { set_newick_c5( $newnode, $c5 ) }
    }

    ( $ind, $newnode );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Read a Newick tree label
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub parseTreeNodeLabel {  #  Empty string is permitted
    my ( $s, $ind ) = @_;
    my ( $lbl, $c );

    if ( substr( $s, $ind, 1 ) eq "'") {
        my $ind1 = ++$ind;

        while ( ) {
            if ( ! defined( $c = substr( $s, $ind, 1 ) ) || $c eq "" ) {
                treeParseError( "missing close quote on label '" . substr( $s, $ind1 ) . "'" )
            }
            elsif ( $c ne "'"  )                  { $ind++ }
            elsif ( substr( $s, $ind, 2 ) eq "''" ) { $ind += 2 }
            else                                    { last }
        }

        $lbl = substr( $s, $ind1, $ind-$ind1 );
        $lbl =~ s/''/'/g;
        $ind++;
    }

    else {
        my $ind1 = $ind;
        while ( defined( $c = substr($s, $ind, 1) ) && $c ne "" && $c !~ /[][\s(,):;]/ ) { $ind++ }
        $lbl = substr( $s, $ind1, $ind-$ind1 );
        $lbl =~ s/_/ /g;
    }

    ( $ind, $lbl );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Read a Newick tree branch length
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub parseBranchLength {
    my ( $s, $ind ) = @_;

    my $c = substr( $s, $ind, 1 );

    my $sign = ( $c ne "-" ) ? 1 : -1;      #  Sign
    if ( $c =~ /[-+]/ ) { $c = substr( $s, ++$ind, 1 ) }

    if ( $c !~ /^[.0-9]$/ ) {   #  Allows starting with decimal
        treeParseError( "invalid branch length character '$c'" )
    }

    my $v = 0;
    while ( $c =~ /[0-9]/ ) {               #  Whole number
        $v = 10 * $v + $c;
        $c = substr( $s, ++$ind, 1 );
    }

    if ( $c eq "." ) {                      #  Fraction
        my $f = 0.1;
        $c = substr( $s, ++$ind, 1 );
        while ( $c =~ /[0-9]/ ) {
            $v += $f * $c;
            $f *= 0.1;
            $c  = substr( $s, ++$ind, 1 );
        }
    }

    $v *= $sign;

    if ( $c =~ /[dDeEgG]/ ) {                 #  Exponent
        $c = substr( $s, ++$ind, 1 );
        my $esign = ( $c ne "-" ) ? 1 : -1;
        if ( $c =~ /^[-+]$/ ) { $c = substr( $s, ++$ind, 1 ) }
        if ( $c !~ /^[0-9]$/ ) {
            treeParseError( "missing branch length exponent '$c'" )
        }

        my $e = 0;
        while ( $c =~ /[0-9]/ ) {
            $e = 10 * $e + $c;
            $c = substr( $s, ++$ind, 1 );
        }
        $e *= $esign;
        $v *= 10**$e;
    }

    ( $ind, $v );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  ( $index, /@commentlist ) = getNextTreeChar( $string, $index )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub getNextTreeChar {       #  Move to next nonblank, noncomment character
    my ( $s, $ind ) = @_;

    my @clist = ();

    #  Skip white space
    if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }

    #  Loop while it is a comment:
    while ( substr( $s, $ind, 1 ) eq "[" ) {
        $ind++;

        #  Find end
        if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) {
            treeParseError( "comment missing closing bracket '["
                           . substr( $s, $ind ) . "'" )
        }
        my $comment = $1;

        #  Save if it includes any "text"
        if ( $comment =~ m/\S/ ) { push @clist, $comment }

        $ind += length( $comment ) + 1;     #  Comment plus closing bracket

        #  Skip white space
        if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) }
    }

    ( $ind, @clist ? \@clist : undef )
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  treeParseError( $message )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub treeParseError { die "Error: parse_newick_subtree: " . $_[0] . "\n" }


#===============================================================================
#  Make a printer plot of a tree:
#
#     $node   newick tree root node
#     $file   undef (= *STDOUT), *STDOUT, *STDERR, or a file name.
#     $width  the approximate characters for the tree without labels
#     $min_dx the minimum horizontal branch length
#     $dy     the vertical space per taxon
#
#  printer_plot_newick( $node, $file (D=*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) )
#===============================================================================
sub printer_plot_newick {
    my ( $node, $file, $width, $min_dx, $dy ) = @_;

    my ( $fh, $close );
    if ( ! defined( $file ) ) {
        $fh = *STDOUT;
    }
    elsif ( $file =~ /^\*/ ) {
        $fh = $file;
    }
    else {
        open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";
        $close = 1;
    }

    print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";
    if ( $close ) { close $fh }
}


#===============================================================================
#  Make a text plot of a tree:
#
#     $node   newick tree root node
#     $width  the approximate characters for the tree without labels
#     $min_dx the minimum horizontal branch length
#     $dy     the vertical space per taxon
#
#  @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) )
#===============================================================================
sub text_plot_newick {
    my ( $node, $width, $min_dx, $dy ) = @_;
    array_ref( $node ) || die "Bad node passed to text_plot_newick\n";
    defined( $min_dx ) and ( $min_dx >=  0 ) or $min_dx =  2;
    defined(     $dy ) and (     $dy >=  1 ) or     $dy =  1;
    defined( $width  )                       or  $width = 68;

    $min_dx = int( $min_dx );
    $dy     = int( $dy );
    my $x_scale = $width / newick_max_X( $node );

    my $hash = {};
    layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy );

    # dump_tree_hash( $node, $hash ); exit;

    #  Generate the lines of the tree one by one:

    my ( $y1, $y2 ) = @{ $hash->{ $node } };
    map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 );
}

#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub layout_printer_plot {
    my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_;
    array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n";
    hash_ref(  $hash ) || die "Bad hash ref passed to layout_printer_plot\n";

    my $dx = newick_x( $node );
    if ( defined( $dx ) ) {
        $dx *= $x_scale;
        $dx >= $min_dx or $dx = $min_dx;
    }
    else {
        $dx = ( $x0 > 0 ) ? $min_dx : 0;
    }
    $dx = int( $dx + 0.4999 );

    my ( $x, $xmax, $y, $ymax, $y1, $y2, $yn1, $yn2 );

    $x = $x0 + $dx;
    $y1 = int( $y0 + 0.5 * $dy + 0.4999 );
    my @dl = newick_desc_list( $node );

    if ( ! @dl ) {               #  A tip
        $xmax  = $x;
        $y = $yn1 = $yn2 = $y2 = $y1;
        $ymax  = $y + 0.5 * $dy;
    }

    else {                       #  A subtree
        $xmax = -1;
        my $xmaxi;
        my $yi;
        my @ylist = ();
        $ymax = $y0;

        foreach ( @dl ) {
            ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy );
            push @ylist, $yi;
            if ( $xmaxi > $xmax ) { $xmax = $xmaxi }
        }

        #  Use of y-list is overkill for saving first and last values,
        #  but eases implimentation of alternative y-value calculations.

        $yn1 = $ylist[ 0];
        $yn2 = $ylist[-1];
        $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 );
    }

    $y2 = int( $ymax - 0.5 * $dy + 0.4999 );

    $hash->{ $node } = [ $y1, $y2, $x0, $x, $y, $yn1, $yn2 ];
    ( $xmax, $ymax, $y );
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Debug routine
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub dump_tree {
    my ( $node, $prefix ) = @_;
    defined( $prefix ) or $prefix = "";
    print STDERR $prefix, join(", ", @$node), "\n";
    my @dl = $node->[0] ? @{$node->[0]} : ();
    foreach ( @dl ) { dump_tree( $_, $prefix . "  " ) }
    $prefix or print STDERR "\n";
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  Debug routine
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub dump_tree_hash {
    my ( $node, $hash, $prefix ) = @_;
    defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = "";
    print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n";
    my @dl = $node->[0] ? @{$node->[0]} : ();
    foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . "  " ) }
}


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#  $line = text_tree_row( $node, $hash, $row, $line, $symb )
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub text_tree_row {
    my ( $node, $hash, $row, $line, $symb ) = @_;

    my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
    if ( $row < $y1 || $row > $y2 ) { return $line }

    if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) }

    if ( $row == $y ) {
        $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : "");
    }

    elsif ( $row > $yn1 && $row < $yn2 ) {
        if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" }
        else { substr( $line, $x ) = "|" }
    }

    my @dl = newick_desc_list( $node );

    if ( @dl < 1 ) {
        $line .= " " . $node->[1];
    }

    else {
        my @list = map { [ $_, "+" ] } @dl;  #  Print symbol for line
        $list[ 0]->[1] = "/";
        $list[-1]->[1] = "\\";

        foreach ( @list ) {
            my ( $n, $s ) = @$_;
            if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
                $line = text_tree_row( $n, $hash, $row, $line, $s );
            }
         }

        if ( $row == $y ) { substr( $line, $x, 1 ) = "+" }
    }

    return $line;
}


1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3