[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Dec 19 17:32:48 2006 UTC revision 1.8, Sun Feb 11 18:25:48 2007 UTC
# Line 1  Line 1 
1  #  #
2  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2007 University of Chicago and Fellowship
3  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
4  #  #
5  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 44  Line 44 
44  #  #
45  #     $tree = \@rootnode;  #     $tree = \@rootnode;
46  #  #
47  #     @node = ( \@desc,  #  reference to list of descendants  #     $node = [ \@desc,  #  reference to list of descendants
48  #                $label, #  node label  #                $label, #  node label
49  #                $x,     #  branch length  #                $x,     #  branch length
50  #               \@c1,    #  reference to comment list 1  #               \@c1,    #  reference to comment list 1
# Line 52  Line 52 
52  #               \@c3,    #  reference to comment list 3  #               \@c3,    #  reference to comment list 3
53  #               \@c4,    #  reference to comment list 4  #               \@c4,    #  reference to comment list 4
54  #               \@c5     #  reference to comment list 5  #               \@c5     #  reference to comment list 5
55  #             )  #             ]
56  #  #
57  #  At present, no routine tests or enforces the length of the list (a single  #  At present, no routine tests or enforces the length of the list (a single
58  #  element list could be a valid internal node).  #  element list could be a valid internal node).
# Line 63  Line 63 
63  #  time, but is different from the prolog representation.  #  time, but is different from the prolog representation.
64  #  #
65  #  #
66    #  Ross Overbeek has a different tree node structure:
67    #
68    #     $node = [ Label,
69    #               DistanceToParent,
70    #               [ ParentPointer, ChildPointer1, ... ],
71    #               [ Name1\tVal1, Name2\tVal2, ... ]
72    #             ]
73    #
74    #  So:
75    #
76    #===============================================================================
77    #  Tree format interconversion:
78    #===============================================================================
79    #
80    #  $gjonewick = overbeek_to_gjonewick( $overbeek )
81    #  $overbeek  = gjonewick_to_overbeek( $gjonewick )
82    #
83  #===============================================================================  #===============================================================================
84  #  Tree data extraction:  #  Tree data extraction:
85  #===============================================================================  #===============================================================================
# Line 92  Line 109 
109  #  set_newick_desc_list( $noderef, @desclist )  #  set_newick_desc_list( $noderef, @desclist )
110  #  set_newick_desc_i( $noderef1, $i, $noderef2)  #  set_newick_desc_i( $noderef1, $i, $noderef2)
111  #  #
112    #  $bool    = newick_is_valid( $noderef )       # verify that tree is valid
113    #
114  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root  #  $bool    = newick_is_rooted( $noderef )      # 2 branches from root
115  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root  #  $bool    = newick_is_unrooted( $noderef )    # 3 or more branches from root
116  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root  #  $bool    = newick_is_tip_rooted( $noderef )  # 1 branch from root
# Line 111  Line 130 
130  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
131  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip( $noderef )
132  #  #
133    #  Tree tip insertion point (tip is on branch of length x that
134    #  is inserted into branch connecting node1 and node2, a distance
135    #  x1 from node1 and x2 from node2):
136    #
137    #  [ $node1, $x1, $node2, $x2, $x ]
138    #           = newick_tip_insertion_point( $tree, $tip )
139    #
140    #  Standardized label for a node in terms of intersection of 3 lowest sorting
141    #  tips (sort is lower case):
142    #
143    #  @TipOrTips = std_node_name( $Tree, $Node )
144    #
145  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
146  #  Paths from root of tree:  #  Paths from root of tree:
147  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 140  Line 171 
171  #  $treecopy = copy_newick_tree( $tree )  #  $treecopy = copy_newick_tree( $tree )
172  #  #
173  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
174  #  The following modify the existing tree, and passibly any components of that  #  The following modify the existing tree, and possibly any components of that
175  #  tree that are reached by reference.  If the old version is still needed, copy  #  tree that are reached by reference.  If the old version is still needed, copy
176  #  before modifying.  #  before modifying.
177  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
# Line 157  Line 188 
188  #  $n_changed = newick_set_undefined_branches( $node, $x )  #  $n_changed = newick_set_undefined_branches( $node, $x )
189  #  $n_changed = newick_set_all_branches( $node, $x )  #  $n_changed = newick_set_all_branches( $node, $x )
190  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
191    #  $node      = newick_rescale_branches( $node, $factor )
192    #
193    #  Modify comments:
194    #
195    #  $node = newick_strip_comments( $node )
196  #  #
197  #  Modify rooting and/or order:  #  Modify rooting and/or order:
198  #  #
# Line 182  Line 218 
218  #  #
219  #  $newtree = collapse_zero_length_branches( $tree )  #  $newtree = collapse_zero_length_branches( $tree )
220  #  #
221    #  $node = newick_insert_at_node( $node, $subtree )
222    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
223    #
224  #===============================================================================  #===============================================================================
225  #  Tree reading and writing:  #  Tree reading and writing:
226  #===============================================================================  #===============================================================================
227  #  #
228  #   writeNewickTree( $tree )  #   writeNewickTree( $tree )
229  #   writeNewickTree( $tree, $file )  #   writeNewickTree( $tree, $file )
230  #   writePrettyTree( $tree, $file )  #   writeNewickTree( $tree, \*FH )
231  #  fwriteNewickTree( $file, $tree )  #  fwriteNewickTree( $file, $tree )  # Matches the C arg list for f... I/O
232  #  $treestring = swriteNewickTree( $tree )  #  $treestring = swriteNewickTree( $tree )
233  #  $treestring = formatNewickTree( $tree )  #  $treestring = formatNewickTree( $tree )
234  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )  #  @textlines  = text_plot_newick( $node, $width, $min_dx, $dy )
235  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )  #   printer_plot_newick( $node, $file, $width, $min_dx, $dy )
236  #  #
237    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
238    #  @trees = read_newick_trees( $file ) # reads to end of file
239  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
240  #  #
241  #===============================================================================  #===============================================================================
242    
243    
244    use Carp;
245    use Data::Dumper;
246    
247  require Exporter;  require Exporter;
248    
249  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
250  our @EXPORT = qw(  our @EXPORT = qw(
251            overbeek_to_gjonewick
252            gjonewick_to_overbeek
253    
254            newick_is_valid
255          newick_is_rooted          newick_is_rooted
256          newick_is_unrooted          newick_is_unrooted
257          tree_rooted_on_tip          tree_rooted_on_tip
# Line 241  Line 289 
289          newick_set_undefined_branches          newick_set_undefined_branches
290          newick_set_all_branches          newick_set_all_branches
291          newick_fix_negative_branches          newick_fix_negative_branches
292            newick_rescale_branches
293    
294            newick_strip_comments
295    
296          normalize_newick_tree          normalize_newick_tree
297          reverse_newick_tree          reverse_newick_tree
# Line 263  Line 314 
314          newick_subtree          newick_subtree
315          collapse_zero_length_branches          collapse_zero_length_branches
316    
317            newick_insert_at_node
318            newick_insert_between_nodes
319    
320          writeNewickTree          writeNewickTree
321          fwriteNewickTree          fwriteNewickTree
322          strNewickTree          strNewickTree
323          formatNewickTree          formatNewickTree
324    
325            read_newick_tree
326            read_newick_trees
327          parse_newick_tree_str          parse_newick_tree_str
328    
329          printer_plot_newick          printer_plot_newick
# Line 307  Line 364 
364    
365  use gjolists qw(  use gjolists qw(
366          common_prefix          common_prefix
         common_and_unique  
367          unique_suffixes          unique_suffixes
368    
         unique_set  
369          duplicates          duplicates
370          random_order          random_order
371    
         union  
372          intersection          intersection
373          set_difference          set_difference
374          );          );
# Line 332  Line 386 
386    
387    
388  #===============================================================================  #===============================================================================
389    #  Interconvert Overbeek and gjonewick trees:
390    #===============================================================================
391    
392    sub overbeek_to_gjonewick
393    {
394        return () unless ref( $_[0] ) eq 'ARRAY';
395        my ( $lbl, $x, $desc ) = @{ $_[0] };
396        my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
397        [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ]
398    }
399    
400    sub gjonewick_to_overbeek
401    {
402        return () unless ref( $_[0] ) eq 'ARRAY';
403        my ( $desc, $lbl, $x ) = @{ $_[0] };
404        my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : ();
405        my $parent = $_[1];
406        my $node = [ $lbl, $x, undef, [] ];
407        $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ];
408        return $node;
409    }
410    
411    #===============================================================================
412  #  Extract tree structure values:  #  Extract tree structure values:
413  #===============================================================================  #===============================================================================
414  #  #
# Line 352  Line 429 
429  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
430    
431  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]  sub newick_desc_ref { $_[0]->[0] }  # = ${$_[0]}[0]
432  sub newick_lbl      { $_[0]->[1] }  sub newick_lbl      { ref($_[0]) ? $_[0]->[1] : Carp::confess() }
433  sub newick_x        { $_[0]->[2] }  sub newick_x        { $_[0]->[2] }
434  sub newick_c1       { $_[0]->[3] }  sub newick_c1       { $_[0]->[3] }
435  sub newick_c2       { $_[0]->[4] }  sub newick_c2       { $_[0]->[4] }
# Line 427  Line 504 
504  #===============================================================================  #===============================================================================
505  #  Some tree property tests:  #  Some tree property tests:
506  #===============================================================================  #===============================================================================
507    #  Tree is valid?
508    #
509    #  $bool = newick_is_valid( $node, $verbose )
510    #-------------------------------------------------------------------------------
511    sub newick_is_valid
512    {
513        my $node = shift;
514    
515        if ( ! array_ref( $node ) )
516        {
517            print STDERR "Node is not array reference\n" if $_[0];
518            return 0;
519        }
520    
521        my @node = @$node;
522        if ( ! @node )
523        {
524            print STDERR "Node is empty array reference\n" if $_[0];
525            return 0;
526        }
527    
528        # Must have descendant or label:
529    
530        if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] )
531        {
532            print STDERR "Node has neither descendant nor label\n" if $_[0];
533            return 0;
534        }
535    
536        #  If comments are present, they must be array references
537    
538        foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () )
539        {
540            if ( defined( $_ ) && ! array_ref( $_ ) )
541            {
542                print STDERR "Node has neither descendant or label\n" if $_[0];
543                return 0;
544            }
545        }
546    
547        #  Inspect the descendants:
548    
549        foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () )
550        {
551            newick_is_valid( $_, @_ ) || return 0
552        }
553    
554        return 1;
555    }
556    
557    
558    #-------------------------------------------------------------------------------
559  #  Tree is rooted (2 branches at root node)?  #  Tree is rooted (2 branches at root node)?
560  #  #
561  #  $bool = newick_is_rooted( $node )  #  $bool = newick_is_rooted( $node )
# Line 580  Line 709 
709  #  @tips = newick_duplicated_tips( $node )  #  @tips = newick_duplicated_tips( $node )
710  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
711  sub newick_duplicated_tips {  sub newick_duplicated_tips {
712      duplicates( newick_tip_list( $_[0] ) );      gjolists::duplicates( newick_tip_list( $_[0] ) );
713  }  }
714    
715    
# Line 616  Line 745 
745      my ( $Tree1, $Tree2 ) = @_;      my ( $Tree1, $Tree2 ) = @_;
746      my ( @Tips1 ) = newick_tip_list( $Tree1 );      my ( @Tips1 ) = newick_tip_list( $Tree1 );
747      my ( @Tips2 ) = newick_tip_list( $Tree2 );      my ( @Tips2 ) = newick_tip_list( $Tree2 );
748      intersection( \@Tips1, \@Tips2 );      gjolists::intersection( \@Tips1, \@Tips2 );
749  }  }
750    
751    
# Line 712  Line 841 
841    
842    
843  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
844    #  Tree tip insertion point (with standard node labels):
845    #
846    #  [ $node1, $x1, $node2, $x2, $x ]
847    #           = newick_tip_insertion_point( $tree, $tip )
848    #
849    #  Which means: tip is on a branch of length x that is inserted into the branch
850    #  connecting node1 and node2, at distance x1 from node1 and x2 from node2.
851    #
852    #                x1    +------ n1a (lowest sorting tip of this subtree)
853    #            +--------n1
854    #            |         +------n1b (lowest sorting tip of this subtree)
855    #  tip-------n
856    #        x   |       +------------- n2a (lowest sorting tip of this subtree)
857    #            +------n2
858    #               x2   +-------- n2b (lowest sorting tip of this subtree)
859    #
860    #  The designations of 1 vs 2, and a vs b are chosen such that:
861    #     n1a < n1b, and n2a < n2b, and n1a < n2a
862    #
863    #  Then the statandard description becomes:
864    #
865    #  [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1,
866    #    [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2,
867    #    x
868    #  ]
869    #
870    #-------------------------------------------------------------------------------
871    sub newick_tip_insertion_point
872    {
873        my ( $tree, $tip ) = @_;
874        $tree && $tip && ref( $tree ) eq 'ARRAY'    or return undef;
875        $tree = copy_newick_tree( $tree )           or return undef;
876        $tree = reroot_newick_to_tip( $tree, $tip ) or return undef;
877        my $node = $tree;
878    
879        my $x  = 0;                        # Distance to node
880        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
881        $node  = $dl->[0];                 # Node adjacent to tip
882        $dl    = newick_desc_ref( $node );
883        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
884        {
885            $node = $dl->[0];
886            $x   += newick_x( $node );
887            $dl   = newick_desc_ref( $node );
888        }
889        $x += newick_x( $node );
890    
891        #  We are now at the node that is the insertion point.
892        #  Is it a tip?
893    
894        my @description;
895    
896        if ( ( ! $dl ) || @$dl == 0 )
897        {
898            @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x );
899        }
900    
901        #  Is it a trifurcation or greater, in which case it does not go
902        #  away with tip deletion?
903    
904        elsif ( @$dl > 2 )
905        {
906            @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x );
907        }
908    
909        #  The node is bifurcating.  We need to describe it.
910    
911        else
912        {
913            my ( $n1, $x1 ) = describe_desc( $dl->[0] );
914            my ( $n2, $x2 ) = describe_desc( $dl->[1] );
915    
916            if ( @$n1 == 2 ) { push @$n1, $n2->[0] }
917            if ( @$n2 == 2 )
918            {
919                @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] );
920            }
921            if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 }
922            @description = ( $n1, $x1, $n2, $x2, $x );
923        }
924    
925        return wantarray ? @description : \@description;
926    }
927    
928    
929    sub describe_desc
930    {
931        my $node = shift;
932    
933        my $x  = 0;                        # Distance to node
934        my $dl = newick_desc_ref( $node ); # Descendent list of tip node;
935        while ( $dl && ( @$dl == 1 ) )     # Traverse unbranched nodes
936        {
937            $node = $dl->[0];
938            $x   += newick_x( $node );
939            $dl   = newick_desc_ref( $node );
940        }
941        $x += newick_x( $node );
942    
943        #  Is it a tip?  Return list of one tip;
944    
945        if ( ( ! $dl ) || @$dl == 0 )
946        {
947            return ( [ newick_lbl( $node ) ], $x );
948        }
949    
950        #  Get tips of each descendent, keeping lowest sorting from each.
951        #  Return the two lowest of those (the third will come from the
952        #  other side of the original node).
953    
954        else
955        {
956            my @rep_tips = sort { lc $a cmp lc $b }
957                           map  { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] }
958                           @$dl;
959            return ( [ @rep_tips[0,1] ], $x );
960        }
961    }
962    
963    
964    #-------------------------------------------------------------------------------
965  #  Standard node name:  #  Standard node name:
966  #     Tip label if at a tip  #     Tip label if at a tip
967  #     Three sorted tip labels intersecting at node, each being smallest  #     Three sorted tip labels intersecting at node, each being smallest
# Line 734  Line 984 
984      #  Work through lists of tips in descendant subtrees, removing them from      #  Work through lists of tips in descendant subtrees, removing them from
985      #  @rest, and keeping the best tip for each subtree.      #  @rest, and keeping the best tip for each subtree.
986    
987      my @rest = tips_in_newick( $tree );      my @rest = newick_tip_list( $tree );
988      my @best = map {      my @best = map {
989              my @tips = sort { lc $a cmp lc $b } tips_in_newick( $_ );              my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ );
990              @rest = set_difference( \@rest, \@tips );              @rest = gjolists::set_difference( \@rest, \@tips );
991              $tips[0];              $tips[0];
992          } newick_desc_list( $noderef );          } newick_desc_list( $noderef );
993    
# Line 856  Line 1106 
1106      @p2 && @p3 || return ();                             #  Were they found?      @p2 && @p3 || return ();                             #  Were they found?
1107    
1108      # Find the common prefix for each pair of paths      # Find the common prefix for each pair of paths
1109      my @p12 = common_prefix( \@p1, \@p2 );      my @p12 = gjolists::common_prefix( \@p1, \@p2 );
1110      my @p13 = common_prefix( \@p1, \@p3 );      my @p13 = gjolists::common_prefix( \@p1, \@p3 );
1111      my @p23 = common_prefix( \@p2, \@p3 );      my @p23 = gjolists::common_prefix( \@p2, \@p3 );
1112    
1113      # Return the longest common prefix of any two paths      # Return the longest common prefix of any two paths
1114      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :      ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 :
# Line 909  Line 1159 
1159      @p1 && @p2 || return undef;                          # Were they found?      @p1 && @p2 || return undef;                          # Were they found?
1160    
1161      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1162      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost
1163      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1164      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1165    
# Line 930  Line 1180 
1180    
1181      array_ref( $node ) && defined( $node1 )      array_ref( $node ) && defined( $node1 )
1182                         && defined( $node2 ) || return undef;                         && defined( $node2 ) || return undef;
1183      my @p1 = path_to_node( $node, $node1 );      my @p1 = path_to_node( $node, $node1 ) or return undef;
1184      my @p2 = path_to_node( $node, $node2 );      my @p2 = path_to_node( $node, $node2 ) or return undef;
     @p1 && @p2 || return undef;                          # Were they found?  
1185    
1186      # Find the unique suffixes of the two paths      # Find the unique suffixes of the two paths
1187      my ( $suf1, $suf2 ) = unique_suffixes( \@p1, \@p2 ); # Common node is lost      my ( $suf1, $suf2 ) = gjolists::unique_suffixes( \@p1, \@p2 ); # Common node is lost
1188      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;      my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0;
1189      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;      my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0;
1190    
# Line 1150  Line 1399 
1399      my ( $node, $x, $not_root ) = @_;      my ( $node, $x, $not_root ) = @_;
1400    
1401      my $n = 0;      my $n = 0;
1402      if ( $not_root ) {      if ( $not_root )
1403        {
1404          set_newick_x( $node, $x );          set_newick_x( $node, $x );
1405          $n++;          $n++;
1406      }      }
1407    
1408      foreach ( newick_desc_list( $node ) ) {      foreach ( newick_desc_list( $node ) )
1409        {
1410          $n += newick_set_all_branches( $_, $x, 1 );          $n += newick_set_all_branches( $_, $x, 1 );
1411      }      }
1412    
# Line 1164  Line 1415 
1415    
1416    
1417  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1418    #  Rescale all branch lenghts by factor.
1419    #
1420    #  $node = newick_rescale_branches( $node, $factor )
1421    #-------------------------------------------------------------------------------
1422    sub newick_rescale_branches {
1423        my ( $node, $factor ) = @_;
1424    
1425        my $x = newick_x( $node );
1426        set_newick_x( $node, $factor * $x ) if $x;
1427    
1428        foreach ( newick_desc_list( $node ) )
1429        {
1430            newick_rescale_branches( $_, $factor );
1431        }
1432    
1433        $node;
1434    }
1435    
1436    
1437    #-------------------------------------------------------------------------------
1438  #  Set negative branches to zero.  The original tree is modfied.  #  Set negative branches to zero.  The original tree is modfied.
1439  #  #
1440  #  $n_changed = newick_fix_negative_branches( $tree )  #  $n_changed = newick_fix_negative_branches( $tree )
# Line 1189  Line 1460 
1460    
1461    
1462  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1463    #  Remove comments from a newick tree (e.g., before writing for phylip).
1464    #
1465    #  $node = newick_strip_comments( $node )
1466    #-------------------------------------------------------------------------------
1467    sub newick_strip_comments {
1468        my ( $node ) = @_;
1469    
1470        @$node = @$node[ 0 .. 2 ];
1471        foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) }
1472        $node;
1473    }
1474    
1475    
1476    #-------------------------------------------------------------------------------
1477  #  Normalize tree order (in place).  #  Normalize tree order (in place).
1478  #  #
1479  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )  #  ( $tree, $label1 ) = normalize_newick_tree( $tree )
# Line 1404  Line 1689 
1689      #  Reorder this subtree:      #  Reorder this subtree:
1690    
1691      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1692      @$dl_ref = random_order( @$dl_ref );      @$dl_ref = gjolists::random_order( @$dl_ref );
1693    
1694      #  Reorder descendants:      #  Reorder descendants:
1695    
# Line 1432  Line 1717 
1717      #      #
1718      #      splice( @$dl1, $path1-1, 1 );      #      splice( @$dl1, $path1-1, 1 );
1719      #      #
1720      #  But this maintains the cyclic order of the nodes:      #  But the following maintains the cyclic order of the nodes:
1721    
1722      my $dl1 = newick_desc_ref( $node1 );      my $dl1 = newick_desc_ref( $node1 );
1723      my $nd1 = @$dl1;      my $nd1 = @$dl1;
# Line 1447  Line 1732 
1732    
1733      my $dl2 = newick_desc_ref( $node2 );      my $dl2 = newick_desc_ref( $node2 );
1734      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }      if ( array_ref( $dl2 ) ) { push @$dl2, $node1 }
1735      else                     { set_newick_desc_list( $node2, [ $node1 ] ) }      else                     { set_newick_desc_list( $node2, $node1 ) }
1736    
1737      #  Move c1 comments from node 1 to node 2:      #  Move c1 comments from node 1 to node 2:
1738    
# Line 1859  Line 2144 
2144      ( $tree, ( $collapse ? @new_desc : () ) );      ( $tree, ( $collapse ? @new_desc : () ) );
2145  }  }
2146    
2147    #-------------------------------------------------------------------------------
2148    #  Add a subtree to a newick tree node:
2149    #
2150    #  $node = newick_insert_at_node( $node, $subtree )
2151    #-------------------------------------------------------------------------------
2152    sub newick_insert_at_node
2153    {
2154        my ( $node, $subtree ) = @_;
2155        array_ref( $node ) && array_ref( $subtree ) or return undef;
2156    
2157        #  We could check validity of trees, but ....
2158    
2159        my $dl = newick_desc_ref( $node );
2160        if ( array_ref( $dl ) )
2161        {
2162            push @$dl, $subtree;
2163        }
2164        else
2165        {
2166            set_newick_desc_ref( $node, [ $subtree ] );
2167        }
2168        return $node;
2169    }
2170    
2171    
2172    #-------------------------------------------------------------------------------
2173    #  Insert a subtree into a newick tree along the path between 2 nodes:
2174    #
2175    #  $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction )
2176    #-------------------------------------------------------------------------------
2177    sub newick_insert_between_nodes
2178    {
2179        my ( $tree, $subtree, $node1, $node2, $fraction ) = @_;
2180        array_ref( $tree ) && array_ref( $subtree ) or return undef;
2181        $fraction >= 0 && $fraction <= 1 or return undef;
2182    
2183        #  Find the paths to the nodes:
2184    
2185        my @path1 = path_to_node( $tree, $node1 ) or return undef;
2186        my @path2 = path_to_node( $tree, $node2 ) or return undef;
2187    
2188        #  Trim the common prefix:
2189    
2190        while ( $path1[1] == $path2[1] )
2191        {
2192            splice( @path1, 0, 2 );
2193            splice( @path2, 0, 2 );
2194        }
2195    
2196        my ( @path, $dist );
2197        if    ( @path1 < 3 )
2198        {
2199            @path2 >= 3 or return undef;              # node1 = node2
2200            $dist = $fraction* newick_path_length( @path2 );
2201            @path = @path2;
2202        }
2203        elsif ( @path2 < 3 )
2204        {
2205            $dist = ( 1 - $fraction ) * newick_path_length( @path1 );
2206            @path = @path1;
2207        }
2208        else
2209        {
2210            my $dist1 = newick_path_length( @path1 );
2211            my $dist2 = newick_path_length( @path2 );
2212            $dist = $fraction * ( $dist1 + $dist2 ) - $dist1;
2213            @path = ( $dist <= 0 ) ? @path1 : @path2;
2214            $dist = abs( $dist );
2215        }
2216    
2217        #  Descend tree until we reach the insertion branch:
2218    
2219        my $x;
2220        while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) )
2221        {
2222            $dist -= $x;
2223            splice( @path, 0, 2 );
2224        }
2225    
2226        #  Insert the new node:
2227    
2228        set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] );
2229        set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) );
2230    
2231        return $tree;
2232    }
2233    
2234    
2235  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2236  #  Prune one or more tips from a tree:  #  Prune one or more tips from a tree:
# Line 2019  Line 2391 
2391  #  Tree writing and reading  #  Tree writing and reading
2392  #  #
2393  #===============================================================================  #===============================================================================
2394  #  writeNewickTree( $tree [, $file ] )  #  writeNewickTree( $tree )
2395    #  writeNewickTree( $tree, $file )
2396    #  writeNewickTree( $tree, \*FH )
2397  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2398  sub writeNewickTree {  sub writeNewickTree {
2399      my ( $tree, $file ) = @_;      my ( $tree, $file ) = @_;
2400      $file || ( $file = \*STDOUT );      my ( $fh, $close ) = open_output( $file );
2401      print  $file  ( strNewickTree( $tree ), "\n" );      $fh or return;
2402        print  $fh  ( strNewickTree( $tree ), "\n" );
2403        close $fh if $close;
2404  }  }
2405    
2406    
# Line 2167  Line 2543 
2543    
2544    
2545  #===============================================================================  #===============================================================================
2546    #  $tree  = read_newick_tree( $file )  # reads to a semicolon
2547    #  @trees = read_newick_trees( $file ) # reads to end of file
2548    #===============================================================================
2549    
2550    sub read_newick_tree
2551    {
2552        my $file = shift;
2553        my ( $fh, $close ) = open_input( $file );
2554        my $tree;
2555        my @lines = ();
2556        while ( defined( $_ = <$fh> ) )
2557        {
2558            chomp;
2559            push @lines, $_;
2560            if ( /;/ )
2561            {
2562                $tree = parse_newick_tree_str( join( ' ', @lines ) );
2563                last;
2564            }
2565        }
2566        close $fh if $close;
2567    
2568        $tree;
2569    }
2570    
2571    
2572    sub read_newick_trees
2573    {
2574        my $file = shift;
2575        my ( $fh, $close ) = open_input( $file );
2576        my @trees = ();
2577        my @lines = ();
2578        while ( defined( $_ = <$fh> ) )
2579        {
2580            chomp;
2581            push @lines, $_;
2582            if ( /;/ )
2583            {
2584                push @trees, parse_newick_tree_str( join( ' ', @lines ) );
2585                @lines = ()
2586            }
2587        }
2588        close $fh if $close;
2589    
2590        @trees;
2591    }
2592    
2593    
2594    #===============================================================================
2595  #  Tree reader adapted from the C language reader in fastDNAml  #  Tree reader adapted from the C language reader in fastDNAml
2596  #  #
2597  #  $tree = parse_newick_tree_str( $string )  #  $tree = parse_newick_tree_str( $string )
# Line 2378  Line 2803 
2803  sub printer_plot_newick {  sub printer_plot_newick {
2804      my ( $node, $file, $width, $min_dx, $dy ) = @_;      my ( $node, $file, $width, $min_dx, $dy ) = @_;
2805    
2806      my ( $fh, $close );      my ( $fh, $close ) = open_output( $file );
2807      if ( ! defined( $file ) ) {      $fh or return;
         $fh = \*STDOUT;  
     }  
     elsif ( $file =~ /^\*/ ) {  
         $fh = $file;  
     }  
     else {  
         open $fh, ">$file" or die "Could not open $file for writing printer_plot_newick\n";  
         $close = 1;  
     }  
2808    
2809      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";      print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n";
2810      if ( $close ) { close $fh }      if ( $close ) { close $fh }
# Line 2555  Line 2971 
2971  }  }
2972    
2973    
2974    #===============================================================================
2975    #  Open an input file stream:
2976    #
2977    #     ( $handle, undef ) = open_input(       );  # \*STDIN
2978    #     ( $handle, undef ) = open_input( \*FH  );
2979    #     ( $handle, 1     ) = open_input( $file );  # need to close $handle
2980    #
2981    #===============================================================================
2982    sub open_input
2983    {
2984        my $file = shift;
2985        my $fh;
2986        if    ( ! defined( $file ) )     { return ( \*STDIN ) }
2987        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
2988        elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
2989    
2990        print STDERR "gjonewick::open_input could not open '$file' for reading\n";
2991        return undef;
2992    }
2993    
2994    
2995    #===============================================================================
2996    #  Open an output file stream:
2997    #
2998    #     ( $handle, undef ) = open_output(      );  # \*STDOUT
2999    #     ( $handle, undef ) = open_output( \*FH );
3000    #     ( $handle, 1     ) = open_output( $file ); # need to close $handle
3001    #
3002    #===============================================================================
3003    sub open_output
3004    {
3005        my $file = shift;
3006        my $fh;
3007        if    ( ! defined( $file ) )     { return ( \*STDOUT ) }
3008        elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
3009        elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
3010    
3011        print STDERR "gjonewick::open_output could not open '$file' for writing\n";
3012        return undef;
3013    }
3014    
3015  1;  1;

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3