[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Dec 19 17:32:48 2006 UTC revision 1.9, Sun Feb 11 18:55:44 2007 UTC
# Line 1  Line 1 
1  #  #
2  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
3  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
4  #  #
5  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 44 
44  #  #
45  #     $tree = \@rootnode;  #     $tree = \@rootnode;
46  #  #
47  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
48  #                $label, #  node label  #                $label, #  node label
49  #                $x,     #  branch length  #                $x,     #  branch length
50  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 52 
52  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
53  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
54  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
55  #             )  #             ]
56  #  #
57  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
58  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 63 
63  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
64  #  #
65  #  #
66    #  Ross Overbeek has a different tree node structure:
67    #
68    #     $node = [ Label,
69    #               DistanceToParent,
70    #               [ ParentPointer, ChildPointer1, ... ],
71    #               [ Name1\tVal1, Name2\tVal2, ... ]
72    #             ]
73    #
74    #  So:
75    #
76    #===============================================================================
77    #  Tree format interconversion:
78    #===============================================================================
79    #
80    #  $bool      = is_overbeek_tree( $tree )
81    #  $bool      = is_gjonewick_tree( $tree )
82    #
83    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
84    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
85    #
86  #===============================================================================  #===============================================================================
87  #  Tree data extraction:  #  Tree data extraction:
88  #===============================================================================  #===============================================================================
# Line 90  Line 110 
110  #  set_newick_c4( $noderef, $listref )  #  set_newick_c4( $noderef, $listref )
111  #  set_newick_c5( $noderef, $listref )  #  set_newick_c5( $noderef, $listref )
112  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
113  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2 )  # 1-based numbering
114    #
115    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
116  #  #
117  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
118  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
# Line 99  Line 121 
121  #  #
122  #  $n       = newick_tip_count( $noderef )  #  $n       = newick_tip_count( $noderef )
123  #  @tiprefs = newick_tip_ref_list( $noderef )  #  @tiprefs = newick_tip_ref_list( $noderef )
124    # \@tiprefs = newick_tip_ref_list( $noderef )
125  #  @tips    = newick_tip_list( $noderef )  #  @tips    = newick_tip_list( $noderef )
126    # \@tips    = newick_tip_list( $noderef )
127    #
128  #  $tipref  = newick_first_tip_ref( $noderef )  #  $tipref  = newick_first_tip_ref( $noderef )
129  #  $tip     = newick_first_tip( $noderef )  #  $tip     = newick_first_tip( $noderef )
130    #
131  #  @tips    = newick_duplicated_tips( $noderef )  #  @tips    = newick_duplicated_tips( $noderef )
132    # \@tips    = newick_duplicated_tips( $noderef )
133    #
134  #  $bool    = newick_tip_in_tree( $noderef, $tipname )  #  $bool    = newick_tip_in_tree( $noderef, $tipname )
135    #
136  #  @tips    = newick_shared_tips( $tree1, $tree2 )  #  @tips    = newick_shared_tips( $tree1, $tree2 )
137    # \@tips    = newick_shared_tips( $tree1, $tree2 )
138  #  #
139  #  $length  = newick_tree_length( $noderef )  #  $length  = newick_tree_length( $noderef )
140    #
141    #  %tip_distances = newick_tip_distances( $noderef )
142    # \%tip_distances = newick_tip_distances( $noderef )
143    #
144  #  $xmax    = newick_max_X( $noderef )  #  $xmax    = newick_max_X( $noderef )
145  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
146  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
147    #
148    #  Tree tip insertion point (tip is on branch of length x that
149    #  is inserted into branch connecting node1 and node2, a distance
150    #  x1 from node1 and x2 from node2):
151    #
152    #  [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip )
153    #
154    #  Standardized label for a node in terms of intersection of 3 lowest sorting
155    #  tips (sort is lower case):
156    #
157    #  @TipOrTips = std_node_name( $tree, $node )
158  #  #
159  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
160  #  Paths from root of tree:  #  Paths from root of tree:
# Line 140  Line 185 
185  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
186  #  #
187  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
188  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
189  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
190  #  before modifying.  #  before modifying.
191  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 202 
202  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
203  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
204  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
205    #  $node      = newick_rescale_branches( $node, $factor )
206    #
207    #  Modify comments:
208    #
209    #  $node = newick_strip_comments( $node )
210  #  #
211  #  Modify rooting and/or order:  #  Modify rooting and/or order:
212  #  #
# Line 170  Line 220 
220  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )  #  $newtree = reroot_newick_next_to_tip( $tree, $tip )
221  #  $newtree = reroot_newick_to_node( $tree, @node )  #  $newtree = reroot_newick_to_node( $tree, @node )
222  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )
223  #  $newtree = reroot_newick_to_node_ref( $tree, $noderef )  #  $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
224  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted  #  $newtree = reroot_newick_to_approx_midpoint( $tree )    # unweighted
225  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips  #  $newtree = reroot_newick_to_approx_midpoint_w( $tree )  # weight by tips
226  #  $newtree = uproot_tip_rooted_newick( $tree )  #  $newtree = uproot_tip_rooted_newick( $tree )
# Line 182  Line 232 
232  #  #
233  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
234  #  #
235    #  $node = newick_insert_at_node( $node, $subtree )
236    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
237    #
238    #===============================================================================
239    #  Tree neighborhood: subtree of n tips to represent a larger tree.
240    #===============================================================================
241    #
242    #  Focus around root:
243    #
244    #  $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
245    #  $subtree = root_neighborhood_representative_tree( $tree, $n )
246    #  @tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
247    #  @tips    = root_neighborhood_representative_tips( $tree, $n )
248    # \@tips    = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
249    # \@tips    = root_neighborhood_representative_tips( $tree, $n )
250    #
251    #  Focus around a tip insertion point (the tip is not in the subtree):
252    #
253    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
254    #  $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
255    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
256    #  @tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
257    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
258    # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
259    #
260  #===============================================================================  #===============================================================================
261  #  Tree reading and writing:  #  Tree reading and writing:
262  #===============================================================================  #===============================================================================
263    #  Write machine-readable trees:
264  #  #
265  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
266  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
267  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
268  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
269  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
270  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
271    #
272    #  Write human-readable trees:
273    #
274  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
275  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
276  #  #
277    #  Read trees:
278    #
279    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
280    #  @trees = read_newick_trees( $file ) # reads to end of file
281  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
282  #  #
283  #===============================================================================  #===============================================================================
284    
285    
286    use Carp;
287    use Data::Dumper;
288    use strict;
289    
290  require Exporter;  require Exporter;
291    
292  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
293  our @EXPORT = qw(  our @EXPORT = qw(
294            is_overbeek_tree
295            is_gjonewick_tree
296            overbeek_to_gjonewick
297            gjonewick_to_overbeek
298            newick_is_valid
299          newick_is_rooted          newick_is_rooted
300          newick_is_unrooted          newick_is_unrooted
301          tree_rooted_on_tip          tree_rooted_on_tip
302          newick_is_bifurcating          newick_is_bifurcating
303          newick_tip_count          newick_tip_count
304            newick_tip_ref_list
305          newick_tip_list          newick_tip_list
306    
307          newick_first_tip          newick_first_tip
308          newick_duplicated_tips          newick_duplicated_tips
309          newick_tip_in_tree          newick_tip_in_tree
310          newick_shared_tips          newick_shared_tips
311    
312          newick_tree_length          newick_tree_length
313            newick_tip_distances
314          newick_max_X          newick_max_X
315          newick_most_distant_tip_ref          newick_most_distant_tip_ref
316          newick_most_distant_tip_name          newick_most_distant_tip_name
317    
318            newick_tip_insertion_point
319    
320          std_newick_name          std_newick_name
321    
322          path_to_tip          path_to_tip
# Line 241  Line 338 
338          newick_set_undefined_branches          newick_set_undefined_branches
339          newick_set_all_branches          newick_set_all_branches
340          newick_fix_negative_branches          newick_fix_negative_branches
341            newick_rescale_branches
342    
343            newick_strip_comments
344    
345          normalize_newick_tree          normalize_newick_tree
346          reverse_newick_tree          reverse_newick_tree
# Line 254  Line 354 
354          reroot_newick_next_to_tip          reroot_newick_next_to_tip
355          reroot_newick_to_node          reroot_newick_to_node
356          reroot_newick_to_node_ref          reroot_newick_to_node_ref
357            reroot_newick_between_nodes
358          reroot_newick_to_approx_midpoint          reroot_newick_to_approx_midpoint
359          reroot_newick_to_approx_midpoint_w          reroot_newick_to_approx_midpoint_w
360          uproot_tip_rooted_newick          uproot_tip_rooted_newick
# Line 263  Line 364 
364          newick_subtree          newick_subtree
365          collapse_zero_length_branches          collapse_zero_length_branches
366    
367            newick_insert_at_node
368            newick_insert_between_nodes
369    
370            root_neighborhood_representative_tree
371            root_neighborhood_representative_tips
372            tip_neighborhood_representative_tree
373            tip_neighborhood_representative_tips
374    
375          writeNewickTree          writeNewickTree
376          fwriteNewickTree          fwriteNewickTree
377          strNewickTree          strNewickTree
378          formatNewickTree          formatNewickTree
379    
380            read_newick_tree
381            read_newick_trees
382          parse_newick_tree_str          parse_newick_tree_str
383    
384          printer_plot_newick          printer_plot_newick
# Line 305  Line 417 
417          );          );
418    
419    
 use gjolists qw(  
         common_prefix  
         common_and_unique  
         unique_suffixes  
   
         unique_set  
         duplicates  
         random_order  
   
         union  
         intersection  
         set_difference  
         );  
   
   
 use strict;  
   
   
420  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
421  #  Internally used definitions  #  Internally used definitions
422  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 332  Line 426 
426    
427    
428  #===============================================================================  #===============================================================================
429    #  Interconvert overbeek and gjonewick trees:
430    #===============================================================================
431    
432    sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) }
433    
434    sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) }
435    
436    sub overbeek_to_gjonewick
437    {
438        return () unless ref( $_[0] ) eq 'ARRAY';
439        my ( $lbl, $x, $desc ) = @{ $_[0] };
440        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
441        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
442    }
443    
444    sub gjonewick_to_overbeek
445    {
446        return () unless ref( $_[0] ) eq 'ARRAY';
447        my ( $desc, $lbl, $x ) = @{ $_[0] };
448        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
449        my $parent = $_[1];
450        my $node = [ $lbl, $x, undef, [] ];
451        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
452        return $node;
453    }
454    
455    
456    #===============================================================================
457  #  Extract tree structure values:  #  Extract tree structure values:
458  #===============================================================================  #===============================================================================
459  #  #
# Line 352  Line 474 
474  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
475    
476  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]
477  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
478  sub newick_x        { $_[0]->[2] }  sub newick_x        { $_[0]->[2] }
479  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { $_[0]->[3] }
480  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { $_[0]->[4] }
# Line 427  Line 549 
549  #===============================================================================  #===============================================================================
550  #  Some tree property tests:  #  Some tree property tests:
551  #===============================================================================  #===============================================================================
552    #  Tree is valid?
553    #
554    #  $bool = newick_is_valid( $node, $verbose )
555    #-------------------------------------------------------------------------------
556    sub newick_is_valid
557    {
558        my $node = shift;
559    
560        if ( ! array_ref( $node ) )
561        {
562            print STDERR "Node is not array reference\n" if $_[0];
563            return 0;
564        }
565    
566        my @node = @$node;
567        if ( ! @node )
568        {
569            print STDERR "Node is empty array reference\n" if $_[0];
570            return 0;
571        }
572    
573        # Must have descendant or label:
574    
575        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
576        {
577            print STDERR "Node has neither descendant nor label\n" if $_[0];
578            return 0;
579        }
580    
581        #  If comments are present, they must be array references
582    
583        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
584        {
585            if ( defined( $_ ) && ! array_ref( $_ ) )
586            {
587                print STDERR "Node has neither descendant or label\n" if $_[0];
588                return 0;
589            }
590        }
591    
592        #  Inspect the descendants:
593    
594        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
595        {
596            newick_is_valid( $_, @_ ) || return 0
597        }
598    
599        return 1;
600    }
601    
602    
603    #-------------------------------------------------------------------------------
604  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
605  #  #
606  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 512  Line 686 
686  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
687  #  List of tip nodes:  #  List of tip nodes:
688  #  #
689  #  @tips = newick_tip_ref_list( $node )  #  @tips = newick_tip_ref_list( $noderef )
690    # \@tips = newick_tip_ref_list( $noderef )
691  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
692  sub newick_tip_ref_list {  sub newick_tip_ref_list {
693      my ( $node, $not_root ) = @_;      my ( $node, $not_root ) = @_;
# Line 529  Line 704 
704          push @list, newick_tip_ref_list( $_, 1 );          push @list, newick_tip_ref_list( $_, 1 );
705      }      }
706    
707      @list;      wantarray ? @list : \@list;
708  }  }
709    
710    
# Line 537  Line 712 
712  #  List of tips:  #  List of tips:
713  #  #
714  #  @tips = newick_tip_list( $node )  #  @tips = newick_tip_list( $node )
715    # \@tips = newick_tip_list( $node )
716  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
717  sub newick_tip_list {  sub newick_tip_list {
718      map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );      my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] );
719        wantarray ? @tips : \@tips;
720  }  }
721    
722    
# Line 578  Line 755 
755  #  List of duplicated tip labels.  #  List of duplicated tip labels.
756  #  #
757  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
758    # \@tips = newick_duplicated_tips( $node )
759  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
760  sub newick_duplicated_tips {  sub newick_duplicated_tips {
761      duplicates( newick_tip_list( $_[0] ) );      my @tips = &duplicates( newick_tip_list( $_[0] ) );
762        wantarray ? @tips : \@tips;
763  }  }
764    
765    
# Line 611  Line 790 
790  #  Tips shared between 2 trees.  #  Tips shared between 2 trees.
791  #  #
792  #  @tips = newick_shared_tips( $tree1, $tree2 )  #  @tips = newick_shared_tips( $tree1, $tree2 )
793    # \@tips = newick_shared_tips( $tree1, $tree2 )
794  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
795  sub newick_shared_tips {  sub newick_shared_tips {
796      my ( $Tree1, $Tree2 ) = @_;      my ( $tree1, $tree2 ) = @_;
797      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my $tips1 = newick_tip_list( $tree1 );
798      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my $tips2 = newick_tip_list( $tree2 );
799      intersection( \@Tips1, \@Tips2 );      my @tips = &intersection( $tips1, $tips2 );
800        wantarray ? @tips : \@tips;
801  }  }
802    
803    
# Line 638  Line 819 
819    
820    
821  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
822    #  Hash of tip nodes and corresponding distances from root:
823    #
824    #   %tip_distances = newick_tip_distances( $node )
825    #  \%tip_distances = newick_tip_distances( $node )
826    #-------------------------------------------------------------------------------
827    sub newick_tip_distances
828    {
829        my ( $node, $x, $hash ) = @_;
830        my $root = ! $hash;
831        ref( $hash ) eq 'HASH' or $hash = {};
832    
833        $x ||= 0;
834        $x  += newick_x( $node ) || 0;
835    
836        #  Is it a tip?
837    
838        my $n_desc = newick_n_desc( $node );
839        if ( ! $n_desc )
840        {
841            $hash->{ newick_lbl( $node ) } = $x;
842            return $hash;
843        }
844    
845        #  Tree rooted on tip?
846    
847        if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) )
848        {
849            $hash->{ newick_lbl( $node ) } = 0;  # Distance to root is zero
850        }
851    
852        foreach ( newick_desc_list( $node ) )
853        {
854            newick_tip_distances( $_, $x, $hash );
855        }
856    
857        wantarray ? %$hash : $hash;
858    }
859    
860    
861    #-------------------------------------------------------------------------------
862  #  Tree max X.  #  Tree max X.
863  #  #
864  #  $xmax = newick_max_X( $node )  #  $xmax = newick_max_X( $node )
# Line 712  Line 933 
933    
934    
935  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
936    #  Tree tip insertion point (with standard node labels):
937    #
938    #  [ $node1, $x1, $node2, $x2, $x ]
939    #           = newick_tip_insertion_point( $tree, $tip )
940    #
941    #  Which means: tip is on a branch of length x that is inserted into the branch
942    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
943    #
944    #                x1    +------ n1a (lowest sorting tip of this subtree)
945    #            +--------n1
946    #            |         +------n1b (lowest sorting tip of this subtree)
947    #  tip-------n
948    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
949    #            +------n2
950    #               x2   +-------- n2b (lowest sorting tip of this subtree)
951    #
952    #  The designations of 1 vs 2, and a vs b are chosen such that:
953    #     n1a < n1b, and n2a < n2b, and n1a < n2a
954    #
955    #  Then the statandard description becomes:
956    #
957    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
958    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
959    #    x
960    #  ]
961    #
962    #-------------------------------------------------------------------------------
963    sub newick_tip_insertion_point
964    {
965        my ( $tree, $tip ) = @_;
966        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
967        $tree = copy_newick_tree( $tree )           or return undef;
968        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
969        my $node = $tree;
970    
971        my $x  = 0;                        # Distance to node
972        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
973        $node  = $dl->[0];                 # Node adjacent to tip
974        $dl    = newick_desc_ref( $node );
975        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
976        {
977            $node = $dl->[0];
978            $x   += newick_x( $node );
979            $dl   = newick_desc_ref( $node );
980        }
981        $x += newick_x( $node );
982    
983        #  We are now at the node that is the insertion point.
984        #  Is it a tip?
985    
986        my @description;
987    
988        if ( ( ! $dl ) || @$dl == 0 )
989        {
990            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
991        }
992    
993        #  Is it a trifurcation or greater, in which case it does not go
994        #  away with tip deletion?
995    
996        elsif ( @$dl > 2 )
997        {
998            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
999        }
1000    
1001        #  The node is bifurcating.  We need to describe it.
1002    
1003        else
1004        {
1005            my ( $n1, $x1 ) = describe_descendant( $dl->[0] );
1006            my ( $n2, $x2 ) = describe_descendant( $dl->[1] );
1007    
1008            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
1009            if ( @$n2 == 2 )
1010            {
1011                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
1012            }
1013            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
1014            @description = ( $n1, $x1, $n2, $x2, $x );
1015        }
1016    
1017        return wantarray ? @description : \@description;
1018    }
1019    
1020    
1021    sub describe_descendant
1022    {
1023        my $node = shift;
1024    
1025        my $x  = 0;                        # Distance to node
1026        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
1027        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
1028        {
1029            $node = $dl->[0];
1030            $x   += newick_x( $node );
1031            $dl   = newick_desc_ref( $node );
1032        }
1033        $x += newick_x( $node );
1034    
1035        #  Is it a tip?  Return list of one tip;
1036    
1037        if ( ( ! $dl ) || @$dl == 0 )
1038        {
1039            return ( [ newick_lbl( $node ) ], $x );
1040        }
1041    
1042        #  Get tips of each descendent, keeping lowest sorting from each.
1043        #  Return the two lowest of those (the third will come from the
1044        #  other side of the original node).
1045    
1046        my @rep_tips = sort { lc $a cmp lc $b }
1047                       map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
1048                       @$dl;
1049        return ( [ @rep_tips[0,1] ], $x );
1050    }
1051    
1052    
1053    #-------------------------------------------------------------------------------
1054  #  Standard node name:  #  Standard node name:
1055  #     Tip label if at a tip  #     Tip label if at a tip
1056  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
1057  #           of all the tips of their subtrees  #           of all the tips of their subtrees
1058  #  #
1059  #  @TipOrTips = std_node_name( $Tree, $Node )  #  @TipOrTips = std_node_name( $tree, $node )
1060  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1061  sub std_node_name {  sub std_node_name {
1062      my $tree = $_[0];      my $tree = $_[0];
# Line 734  Line 1073 
1073      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
1074      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
1075    
1076      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
1077      my @best = map {      my @best = map
1078              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );            {
1079              @rest = set_difference( \@rest, \@tips );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
1080                @rest = &set_difference( \@rest, \@tips );
1081              $tips[0];              $tips[0];
1082          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
1083    
# Line 856  Line 1196 
1196      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1197    
1198      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1199      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = &common_prefix( \@p1, \@p2 );
1200      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = &common_prefix( \@p1, \@p3 );
1201      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = &common_prefix( \@p2, \@p3 );
1202    
1203      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1204      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 909  Line 1249 
1249      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1250    
1251      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1252      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1253      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1254      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1255    
# Line 930  Line 1270 
1270    
1271      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1272                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1273      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1274      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1275    
1276      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1277      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost
1278      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1279      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1280    
# Line 1150  Line 1489 
1489      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1490    
1491      my $n = 0;      my $n = 0;
1492      if ( $not_root ) {      if ( $not_root )
1493        {
1494          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1495          $n++;          $n++;
1496      }      }
1497    
1498      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1499        {
1500          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1501      }      }
1502    
# Line 1164  Line 1505 
1505    
1506    
1507  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1508    #  Rescale all branch lenghts by factor.
1509    #
1510    #  $node = newick_rescale_branches( $node, $factor )
1511    #-------------------------------------------------------------------------------
1512    sub newick_rescale_branches {
1513        my ( $node, $factor ) = @_;
1514    
1515        my $x = newick_x( $node );
1516        set_newick_x( $node, $factor * $x ) if $x;
1517    
1518        foreach ( newick_desc_list( $node ) )
1519        {
1520            newick_rescale_branches( $_, $factor );
1521        }
1522    
1523        $node;
1524    }
1525    
1526    
1527    #-------------------------------------------------------------------------------
1528  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1529  #  #
1530  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1189  Line 1550 
1550    
1551    
1552  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1553    #  Remove comments from a newick tree (e.g., before writing for phylip).
1554    #
1555    #  $node = newick_strip_comments( $node )
1556    #-------------------------------------------------------------------------------
1557    sub newick_strip_comments {
1558        my ( $node ) = @_;
1559    
1560        @$node = @$node[ 0 .. 2 ];
1561        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1562        $node;
1563    }
1564    
1565    
1566    #-------------------------------------------------------------------------------
1567  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1568  #  #
1569  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1336  Line 1711 
1711  #  #
1712  #  $tree = unaesthetic_newick_tree( $treeref, $dir )  #  $tree = unaesthetic_newick_tree( $treeref, $dir )
1713  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1714  sub unaesthetic_newick_tree {  sub unaesthetic_newick_tree
1715    {
1716      my ( $tree, $dir ) = @_;      my ( $tree, $dir ) = @_;
1717      my %cnt;      my %cnt;
1718    
# Line 1356  Line 1732 
1732  #           = 0 for no change, and  #           = 0 for no change, and
1733  #           > 0 for downward branch (small group first).  #           > 0 for downward branch (small group first).
1734  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1735  sub reorder_against_tip_count {  sub reorder_against_tip_count
1736    {
1737      my ( $node, $cntref, $dir ) = @_;      my ( $node, $cntref, $dir ) = @_;
1738    
1739      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1395  Line 1772 
1772  #  #
1773  #  $tree = random_order_newick_tree( $tree )  #  $tree = random_order_newick_tree( $tree )
1774  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1775  sub random_order_newick_tree {  sub random_order_newick_tree
1776    {
1777      my ( $node ) = @_;      my ( $node ) = @_;
1778    
1779      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
# Line 1404  Line 1782 
1782      #  Reorder this subtree:      #  Reorder this subtree:
1783    
1784      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1785      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = &random_order( @$dl_ref );
1786    
1787      #  Reorder descendants:      #  Reorder descendants:
1788    
# Line 1419  Line 1797 
1797  #  #
1798  #  $newtree = reroot_newick_by_path( @path )  #  $newtree = reroot_newick_by_path( @path )
1799  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1800  sub reroot_newick_by_path {  sub reroot_newick_by_path
1801    {
1802      my ( $node1, $path1, @rest ) = @_;      my ( $node1, $path1, @rest ) = @_;
1803      array_ref( $node1 ) || return undef;      #  Always expect a node      array_ref( $node1 ) || return undef;      #  Always expect a node
1804    
# Line 1432  Line 1811 
1811      #      #
1812      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1813      #      #
1814      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1815    
1816      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1817      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1447  Line 1826 
1826    
1827      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1828      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1829      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1830    
1831      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1832    
# Line 1527  Line 1906 
1906    
1907    
1908  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1909    #  Reroot a newick tree along the path between 2 nodes:
1910    #
1911    #  $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction )
1912    #-------------------------------------------------------------------------------
1913    sub reroot_newick_between_nodes
1914    {
1915        my ( $tree, $node1, $node2, $fraction ) = @_;
1916        array_ref( $tree ) or return undef;
1917        $fraction >= 0 && $fraction <= 1 or return undef;
1918    
1919        #  Find the paths to the nodes:
1920    
1921        my @path1 = path_to_node( $tree, $node1 ) or return undef;
1922        my @path2 = path_to_node( $tree, $node2 ) or return undef;
1923    
1924        #  Trim the common prefix, saving it:
1925    
1926        my @prefix = ();
1927        while ( $path1[1] == $path2[1] )
1928        {
1929            push @prefix, splice( @path1, 0, 2 );
1930            splice( @path2, 0, 2 );
1931        }
1932    
1933        my ( @path, $dist );
1934        if    ( @path1 < 3 )
1935        {
1936            @path2 >= 3 or return undef;              # node1 = node2
1937            $dist = $fraction * newick_path_length( @path2 );
1938            @path = @path2;
1939        }
1940        elsif ( @path2 < 3 )
1941        {
1942            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
1943            @path = @path1;
1944        }
1945        else
1946        {
1947            my $dist1 = newick_path_length( @path1 );
1948            my $dist2 = newick_path_length( @path2 );
1949            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
1950            @path = ( $dist <= 0 ) ? @path1 : @path2;
1951            $dist = abs( $dist );
1952        }
1953    
1954        #  Descend tree until we reach the insertion branch:
1955    
1956        my $x;
1957        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
1958        {
1959            $dist -= $x;
1960            push @prefix, splice( @path, 0, 2 );
1961        }
1962    
1963        #  Insert the new node:
1964    
1965        my $newnode = [ [ $path[2] ], undef, $dist ];
1966        set_newick_desc_i( $path[0], $path[1], $newnode );
1967        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
1968    
1969        #  We can now build the path from root to the new node
1970    
1971        reroot_newick_by_path( @prefix, @path[0,1], $newnode );
1972    }
1973    
1974    
1975    #-------------------------------------------------------------------------------
1976  #  Move root of tree to an approximate midpoint.  #  Move root of tree to an approximate midpoint.
1977  #  #
1978  #  $newtree = reroot_newick_to_approx_midpoint( $tree )  #  $newtree = reroot_newick_to_approx_midpoint( $tree )
# Line 1859  Line 2305 
2305      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2306  }  }
2307    
2308    #-------------------------------------------------------------------------------
2309    #  Add a subtree to a newick tree node:
2310    #
2311    #  $node = newick_insert_at_node( $node, $subtree )
2312    #-------------------------------------------------------------------------------
2313    sub newick_insert_at_node
2314    {
2315        my ( $node, $subtree ) = @_;
2316        array_ref( $node ) && array_ref( $subtree ) or return undef;
2317    
2318        #  We could check validity of trees, but ....
2319    
2320        my $dl = newick_desc_ref( $node );
2321        if ( array_ref( $dl ) )
2322        {
2323            push @$dl, $subtree;
2324        }
2325        else
2326        {
2327            set_newick_desc_ref( $node, [ $subtree ] );
2328        }
2329        return $node;
2330    }
2331    
2332    
2333    #-------------------------------------------------------------------------------
2334    #  Insert a subtree into a newick tree along the path between 2 nodes:
2335    #
2336    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2337    #-------------------------------------------------------------------------------
2338    sub newick_insert_between_nodes
2339    {
2340        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2341        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2342        $fraction >= 0 && $fraction <= 1 or return undef;
2343    
2344        #  Find the paths to the nodes:
2345    
2346        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2347        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2348    
2349        #  Trim the common prefix:
2350    
2351        while ( $path1[1] == $path2[1] )
2352        {
2353            splice( @path1, 0, 2 );
2354            splice( @path2, 0, 2 );
2355        }
2356    
2357        my ( @path, $dist );
2358        if    ( @path1 < 3 )
2359        {
2360            @path2 >= 3 or return undef;              # node1 = node2
2361            $dist = $fraction * newick_path_length( @path2 );
2362            @path = @path2;
2363        }
2364        elsif ( @path2 < 3 )
2365        {
2366            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2367            @path = @path1;
2368        }
2369        else
2370        {
2371            my $dist1 = newick_path_length( @path1 );
2372            my $dist2 = newick_path_length( @path2 );
2373            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2374            @path = ( $dist <= 0 ) ? @path1 : @path2;
2375            $dist = abs( $dist );
2376        }
2377    
2378        #  Descend tree until we reach the insertion branch:
2379    
2380        my $x;
2381        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2382        {
2383            $dist -= $x;
2384            splice( @path, 0, 2 );
2385        }
2386    
2387        #  Insert the new node:
2388    
2389        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2390        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2391    
2392        return $tree;
2393    }
2394    
2395    
2396  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2397  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
# Line 2016  Line 2549 
2549    
2550  #===============================================================================  #===============================================================================
2551  #  #
2552    #  Representative subtrees
2553    #
2554    #===============================================================================
2555    #  Find subtree of size n representating vicinity of the root:
2556    #
2557    #   $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority )
2558    #   $subtree = root_neighborhood_representative_tree( $tree, $n )
2559    #
2560    #  Note that if $tree is rooted, then the subtree will also be.  This can have
2561    #  consequences on downstream programs.
2562    #-------------------------------------------------------------------------------
2563    sub root_neighborhood_representative_tree
2564    {
2565        my ( $tree, $n, $tip_priority ) = @_;
2566        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2567        if ( newick_tip_count( $tree ) <= $n ) { return $tree }
2568    
2569        $tip_priority ||= default_tip_priority( $tree );
2570        my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2571                   root_proximal_newick_subtrees( $tree, $n );
2572    
2573        newick_subtree( copy_newick_tree( $tree ), \@tips );
2574    }
2575    
2576    
2577    #-------------------------------------------------------------------------------
2578    #  Find n tips to represent tree lineages in vicinity of another tip.
2579    #  Default tip priority is short total branch length.
2580    #
2581    #  \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2582    #   @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority )
2583    #  \@tips = root_neighborhood_representative_tips( $tree, $n )
2584    #   @tips = root_neighborhood_representative_tips( $tree, $n )
2585    #-------------------------------------------------------------------------------
2586    sub root_neighborhood_representative_tips
2587    {
2588        my ( $tree, $n, $tip_priority ) = @_;
2589        array_ref( $tree ) && ( $n >= 2 ) or return undef;
2590    
2591        my @tips;
2592        if ( newick_tip_count( $tree ) <= $n )
2593        {
2594            @tips = newick_tip_list( $tree );
2595        }
2596        else
2597        {
2598            $tip_priority ||= default_tip_priority( $tree );
2599            @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) }
2600                    root_proximal_newick_subtrees( $tree, $n );
2601        }
2602    
2603        wantarray ? @tips : \@tips;
2604    }
2605    
2606    
2607    #-------------------------------------------------------------------------------
2608    #  Find subtree of size n representating vicinity of a tip:
2609    #
2610    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority )
2611    #   $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n )
2612    #-------------------------------------------------------------------------------
2613    sub tip_neighborhood_representative_tree
2614    {
2615        my ( $tree, $tip, $n, $tip_priority ) = @_;
2616        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2617        newick_tip_in_tree( $tree, $tip ) or return undef;
2618    
2619        my $tree1 = copy_newick_tree( $tree );
2620        if ( newick_tip_count( $tree1 ) - 1 <= $n )
2621        {
2622            return prune_from_newick( $tree1, $tip )
2623        }
2624    
2625        $tree1 = reroot_newick_to_tip( $tree1, $tip );
2626        $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2627        my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2628        newick_subtree( copy_newick_tree( $tree ), \@tips );
2629    }
2630    
2631    
2632    #-------------------------------------------------------------------------------
2633    #  Find n tips to represent tree lineages in vicinity of another tip.
2634    #  Default tip priority is short total branch length.
2635    #
2636    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2637    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority )
2638    #  \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2639    #   @tips = tip_neighborhood_representative_tips( $tree, $tip, $n )
2640    #-------------------------------------------------------------------------------
2641    sub tip_neighborhood_representative_tips
2642    {
2643        my ( $tree, $tip, $n, $tip_priority ) = @_;
2644        array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef;
2645        newick_tip_in_tree( $tree, $tip ) or return undef;
2646    
2647        my @tips = newick_tip_list( $tree );
2648        if ( newick_tip_count( $tree ) - 1 <= $n )
2649        {
2650            @tips = grep { $_ ne $tip } @tips;
2651        }
2652        else
2653        {
2654            my $tree1 = copy_newick_tree( $tree );
2655            $tree1 = reroot_newick_to_tip( $tree1, $tip );
2656            $tree1 = newick_desc_i( $tree1, 1 );        # Node immediately below tip
2657            @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority );
2658        }
2659    
2660        wantarray ? @tips : \@tips;
2661    }
2662    
2663    
2664    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2665    #  Anonymous hash of the negative distance from root to each tip:
2666    #
2667    #   \%tip_priority = default_tip_priority( $tree )
2668    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2669    sub default_tip_priority
2670    {
2671        my ( $tree ) = @_;
2672        my $tip_distances = newick_tip_distances( $tree ) || {};
2673        return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances };
2674    }
2675    
2676    
2677    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2678    #  Select a tip from a subtree base on a priority value:
2679    #
2680    #    $tip = representative_tip_of_newick_node( $node, \%tip_priority )
2681    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2682    sub representative_tip_of_newick_node
2683    {
2684        my ( $node, $tip_priority ) = @_;
2685        my ( $tip ) = sort { $b->[1] <=> $a->[1] }   # The best
2686                      map  { [ $_, $tip_priority->{ $_ } ] }
2687                      newick_tip_list( $node );
2688        $tip->[0];                                   # Label from label-priority pair
2689    }
2690    
2691    
2692    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2693    #  Find n subtrees focused around the root of a tree.  Typically each will
2694    #  then be reduced to a single tip to make a representative tree:
2695    #
2696    #   @subtrees = root_proximal_newick_subtrees( $tree, $n )
2697    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2698    sub root_proximal_newick_subtrees
2699    {
2700        my ( $tree, $n ) = @_;
2701        my $node_start_end = newick_branch_intervals( $tree );
2702        n_representative_branches( $n, $node_start_end );
2703    }
2704    
2705    
2706    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2707    #   @node_start_end = newick_branch_intervals( $tree )
2708    #  \@node_start_end = newick_branch_intervals( $tree )
2709    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2710    sub newick_branch_intervals
2711    {
2712        my ( $node, $parent_x ) = @_;
2713        $parent_x ||= 0;
2714        my ( $desc, undef, $dx ) = @$node;
2715        my $x = $parent_x + $dx;
2716        my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ];
2717        my @intervals = ( $interval,
2718                          map { &newick_branch_intervals( $_, $x ) } @$desc
2719                        );
2720        return wantarray ? @intervals : \@intervals;
2721    }
2722    
2723    
2724    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2725    #   @ids = n_representative_branches( $n,  @id_start_end )
2726    #   @ids = n_representative_branches( $n, \@id_start_end )
2727    #  \@ids = n_representative_branches( $n,  @id_start_end )
2728    #  \@ids = n_representative_branches( $n, \@id_start_end )
2729    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2730    sub n_representative_branches
2731    {
2732        my $n = shift;
2733        #  Sort intervals by start point:
2734        my @unprocessed = sort { $a->[1] <=> $b->[1] }
2735                          ( @_ == 1 ) ? @{ $_[0] } : @_;
2736        my @active = ();
2737        my ( $interval, $current_point );
2738        foreach $interval ( @unprocessed )
2739        {
2740            $current_point = $interval->[1];
2741            #  Filter out intervals that have ended.  This is N**2 in the number
2742            #  of representatives.  Fixing this would require maintaining a sorted
2743            #  active list.
2744            @active = grep { $_->[2] > $current_point } @active;
2745            push @active, $interval;
2746            last if ( @active >= $n );
2747        }
2748    
2749        my @ids = map { $_->[0] } @active;
2750        return wantarray() ? @ids : \@ids;
2751    }
2752    
2753    
2754    #===============================================================================
2755    #
2756  #  Tree writing and reading  #  Tree writing and reading
2757  #  #
2758  #===============================================================================  #===============================================================================
2759  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
2760    #  writeNewickTree( $tree, $file )
2761    #  writeNewickTree( $tree, \*FH )
2762  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2763  sub writeNewickTree {  sub writeNewickTree {
2764      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
2765      $file || ( $file = \*STDOUT );      my ( $fh, $close ) = open_output( $file );
2766      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
2767        print  $fh  ( strNewickTree( $tree ), "\n" );
2768        close $fh if $close;
2769  }  }
2770    
2771    
# Line 2167  Line 2908 
2908    
2909    
2910  #===============================================================================  #===============================================================================
2911    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
2912    #  @trees = read_newick_trees( $file ) # reads to end of file
2913    #===============================================================================
2914    
2915    sub read_newick_tree
2916    {
2917        my $file = shift;
2918        my ( $fh, $close ) = open_input( $file );
2919        my $tree;
2920        my @lines = ();
2921        foreach ( <$fh> )
2922        {
2923            chomp;
2924            push @lines, $_;
2925            if ( /;/ )
2926            {
2927                $tree = parse_newick_tree_str( join( ' ', @lines ) );
2928                last;
2929            }
2930        }
2931        close $fh if $close;
2932    
2933        $tree;
2934    }
2935    
2936    
2937    sub read_newick_trees
2938    {
2939        my $file = shift;
2940        my ( $fh, $close ) = open_input( $file );
2941        my @trees = ();
2942        my @lines = ();
2943        foreach ( <$fh> )
2944        {
2945            chomp;
2946            push @lines, $_;
2947            if ( /;/ )
2948            {
2949                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
2950                @lines = ()
2951            }
2952        }
2953        close $fh if $close;
2954    
2955        @trees;
2956    }
2957    
2958    
2959    #===============================================================================
2960  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
2961  #  #
2962  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2378  Line 3168 
3168  sub printer_plot_newick {  sub printer_plot_newick {
3169      my ( $node, $file, $width, $min_dx, $dy ) = @_;      my ( $node, $file, $width, $min_dx, $dy ) = @_;
3170    
3171      my ( $fh, $close );      my ( $fh, $close ) = open_output( $file );
3172      if ( ! defined( $file ) ) {      $fh or return;
         $fh = \*STDOUT;  
     }  
     elsif ( $file =~ /^\*/ ) {  
         $fh = $file;  
     }  
     else {  
         open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  
         $close = 1;  
     }  
3173    
3174      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";
3175      if ( $close ) { close $fh }      if ( $close ) { close $fh }
# Line 2555  Line 3336 
3336  }  }
3337    
3338    
3339    #===============================================================================
3340    #  Open an input file stream:
3341    #
3342    #     ( $handle, undef ) = open_input(       );  # \*STDIN
3343    #     ( $handle, undef ) = open_input( \*FH  );
3344    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
3345    #
3346    #===============================================================================
3347    sub open_input
3348    {
3349        my $file = shift;
3350        my $fh;
3351        if    ( ! defined( $file ) )     { return ( \*STDIN ) }
3352        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
3353        elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
3354    
3355        print STDERR "gjonewick::open_input could not open '$file' for reading\n";
3356        return undef;
3357    }
3358    
3359    
3360    #===============================================================================
3361    #  Open an output file stream:
3362    #
3363    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
3364    #     ( $handle, undef ) = open_output( \*FH );
3365    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
3366    #
3367    #===============================================================================
3368    sub open_output
3369    {
3370        my $file = shift;
3371        my $fh;
3372        if    ( ! defined( $file ) )     { return ( \*STDOUT ) }
3373        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
3374        elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
3375    
3376        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
3377        return undef;
3378    }
3379    
3380    
3381    #===============================================================================
3382    #  Some subroutines copied from gjolists
3383    #===============================================================================
3384    #  Return the common prefix of two lists:
3385    #
3386    #  @common = common_prefix( \@list1, \@list2 )
3387    #-----------------------------------------------------------------------------
3388    sub common_prefix
3389    {
3390        my ($l1, $l2) = @_;
3391        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3392        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3393    
3394        my $i = 0;
3395        my $l1_i;
3396        while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ }
3397    
3398        return @$l1[ 0 .. ($i-1) ];  # perl handles negative range
3399    }
3400    
3401    
3402    #-----------------------------------------------------------------------------
3403    #  Return the unique suffixes of each of two lists:
3404    #
3405    #  ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 )
3406    #-----------------------------------------------------------------------------
3407    sub unique_suffixes
3408    {
3409        my ($l1, $l2) = @_;
3410        ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n";
3411        ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n";
3412    
3413        my $i = 0;
3414        my @l1 = @$l1;
3415        my @l2 = @$l2;
3416        my $l1_i;
3417        while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ }
3418    
3419        splice @l1, 0, $i;
3420        splice @l2, 0, $i;
3421        return ( \@l1, \@l2 );
3422    }
3423    
3424    
3425    #-------------------------------------------------------------------------------
3426    #  List of values duplicated in a list (stable in order by second occurance):
3427    #
3428    #  @dups = duplicates( @list )
3429    #-------------------------------------------------------------------------------
3430    sub duplicates
3431    {
3432        my %cnt = ();
3433        grep { ++$cnt{$_} == 2 } @_;
3434    }
3435    
3436    
3437    #-------------------------------------------------------------------------------
3438    #  Randomize the order of a list:
3439    #
3440    #  @random = random_order( @list )
3441    #-------------------------------------------------------------------------------
3442    sub random_order
3443    {
3444        my ( $i, $j );
3445        for ( $i = @_ - 1; $i > 0; $i-- )
3446        {
3447            $j = int( ($i+1) * rand() );
3448            ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j
3449        }
3450    
3451       @_;
3452    }
3453    
3454    
3455    #-----------------------------------------------------------------------------
3456    #  Intersection of two or more sets:
3457    #
3458    #  @intersection = intersection( \@set1, \@set2, ... )
3459    #-----------------------------------------------------------------------------
3460    sub intersection
3461    {
3462        my $set = shift;
3463        my @intersection = @$set;
3464    
3465        foreach $set ( @_ )
3466        {
3467            my %set = map { $_ => 1 } @$set;
3468            @intersection = grep { exists $set{ $_ } } @intersection;
3469        }
3470    
3471        @intersection;
3472    }
3473    
3474    
3475    #-----------------------------------------------------------------------------
3476    #  Elements in set 1, but not set 2:
3477    #
3478    #  @difference = set_difference( \@set1, \@set2 )
3479    #-----------------------------------------------------------------------------
3480    sub set_difference
3481    {
3482        my ($set1, $set2) = @_;
3483        my %set2 = map { $_ => 1 } @$set2;
3484        grep { ! ( exists $set2{$_} ) } @$set1;
3485    }
3486    
3487    
3488  1;  1;

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3