[Bio] / FigKernelPackages / gjonewicklib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjonewicklib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Sun Sep 6 22:38:32 2009 UTC revision 1.22, Fri Aug 27 23:34:33 2010 UTC
# Line 1  Line 1 
1    # This is a SAS component.
2    
3  #  #
4  # Copyright (c) 2003-2007 University of Chicago and Fellowship  # Copyright (c) 2003-2010 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
7  # This file is part of the SEED Toolkit.  # This file is part of the SEED Toolkit.
# Line 145  Line 147 
147  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )  #  ( $tipref,  $xmax ) = newick_most_distant_tip_ref( $noderef )
148  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )  #  ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef )
149  #  #
150    #  Provide a standard name by which two trees can be compared for same topology
151    #
152    #  $stdname = std_tree_name( $tree )
153    #
154  #  Tree tip insertion point (tip is on branch of length x that  #  Tree tip insertion point (tip is on branch of length x that
155  #  is inserted into branch connecting node1 and node2, a distance  #  is inserted into branch connecting node1 and node2, a distance
156  #  x1 from node1 and x2 from node2):  #  x1 from node1 and x2 from node2):
# Line 266  Line 272 
272  # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )  # \@tips    = tip_neighborhood_representative_tips( $tree, $tip, $n )
273  #  #
274  #===============================================================================  #===============================================================================
275    #  Random trees
276    #===============================================================================
277    #
278    #   $tree = random_equibranch_tree(  @tips, \%options )
279    #   $tree = random_equibranch_tree( \@tips, \%options )
280    #   $tree = random_equibranch_tree(  @tips )
281    #   $tree = random_equibranch_tree( \@tips )
282    #
283    #   $tree = random_ultrametric_tree(  @tips, \%options )
284    #   $tree = random_ultrametric_tree( \@tips, \%options )
285    #   $tree = random_ultrametric_tree(  @tips )
286    #   $tree = random_ultrametric_tree( \@tips )
287    #
288    #===============================================================================
289  #  Tree reading and writing:  #  Tree reading and writing:
290  #===============================================================================  #===============================================================================
291  #  Write machine-readable trees:  #  Write machine-readable trees:
# Line 325  Line 345 
345    
346          newick_tip_insertion_point          newick_tip_insertion_point
347    
348          std_newick_name          std_tree_name
349    
350          path_to_tip          path_to_tip
351          path_to_named_node          path_to_named_node
# Line 385  Line 405 
405          tip_neighborhood_representative_tree          tip_neighborhood_representative_tree
406          tip_neighborhood_representative_tips          tip_neighborhood_representative_tips
407    
408            random_equibranch_tree
409            random_ultrametric_tree
410    
411          writeNewickTree          writeNewickTree
412          fwriteNewickTree          fwriteNewickTree
413          strNewickTree          strNewickTree
# Line 434  Line 457 
457  #  Internally used definitions  #  Internally used definitions
458  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
459    
460  sub array_ref { ref( $_[0] ) eq "ARRAY" }  sub array_ref { $_[0] && ref( $_[0] ) eq 'ARRAY' }
461  sub hash_ref  { ref( $_[0] ) eq "HASH"  }  sub hash_ref  { $_[0] && ref( $_[0] ) eq 'HASH'  }
462    
463    
464  #===============================================================================  #===============================================================================
# Line 497  Line 520 
520    
521  sub newick_desc_list {  sub newick_desc_list {
522      my $node = $_[0];      my $node = $_[0];
523      ! array_ref( $node      ) ? undef           :      array_ref( $node ) && array_ref( $node->[0] ) ? @{ $node->[0] } : ();
       array_ref( $node->[0] ) ? @{ $node->[0] } :  
                                 ()              ;  
524  }  }
525    
526  sub newick_n_desc {  sub newick_n_desc {
527      my $node = $_[0];      my $node = $_[0];
528      ! array_ref( $node      ) ? undef                  :      array_ref( $node ) && array_ref( $node->[0] ) ? scalar @{ $node->[0] } : 0;
       array_ref( $node->[0] ) ? scalar @{ $node->[0] } :  
                                 0                      ;  
529  }  }
530    
531  sub newick_desc_i {  sub newick_desc_i {
532      my ( $node, $i ) = @_;      my ( $node, $i ) = @_;
533      ! array_ref( $node      ) ? undef              :      array_ref( $node ) && $i && array_ref( $node->[0] ) ? $node->[0]->[$i-1] : undef;
       array_ref( $node->[0] ) ? $node->[0]->[$i-1] :  
                                 undef              ;  
534  }  }
535    
536  sub node_is_tip {  sub node_is_tip {
# Line 1651  Line 1668 
1668    
1669    
1670  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1671    #  Standard name for a Newick tree topology
1672    #
1673    #    $stdname = std_tree_name( $tree )
1674    #
1675    #-------------------------------------------------------------------------------
1676    sub std_tree_name
1677    {
1678        my ( $tree ) = @_;
1679        my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree );
1680        ( std_tree_name_2( reroot_newick_next_to_tip( copy_newick_tree( $tree ), $mintip ) ) )[0];
1681    }
1682    
1683    
1684    #
1685    #  ( $name, $mintip ) = std_tree_name_2( $node )
1686    #
1687    sub std_tree_name_2
1688    {
1689        my ( $node ) = @_;
1690    
1691        my @descends = newick_desc_list( $node );
1692        if ( @descends == 0 )
1693        {
1694            my $lbl = newick_lbl( $node );
1695            return ( $lbl, $lbl );
1696        }
1697    
1698        my @list = sort { lc $a->[1] cmp lc $b->[1] || $a->[1] cmp $b->[1] }
1699                   map  { [ std_tree_name_2( $_ ) ] }
1700                   @descends;
1701        my $mintip = $list[0]->[1];
1702        my $name   = '(' . join( "\t", map { $_->[0] } @list ) . ')';
1703    
1704        return ( $name, $mintip );
1705    }
1706    
1707    
1708    #-------------------------------------------------------------------------------
1709  #  Move largest groups to periphery of tree (in place).  #  Move largest groups to periphery of tree (in place).
1710  #  #
1711  #      dir  <= -2 for up-sweeping tree (big groups always first),  #      dir  <= -2 for up-sweeping tree (big groups always first),
# Line 1710  Line 1765 
1765      my $nd = newick_n_desc( $node );      my $nd = newick_n_desc( $node );
1766      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip      if ( $nd <  1 ) { return $node }       #  Do nothing to a tip
1767    
     #  Reorder this subtree:  
   
1768      my $dl_ref = newick_desc_ref( $node );      my $dl_ref = newick_desc_ref( $node );
1769      if    ( $dir < 0 ) {                   #  Big group first  
1770          @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;      #  Reorder this subtree (biggest subtrees to outside)
1771    
1772        if ( $dir )
1773        {
1774            #  Big group first
1775            my @dl = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref;
1776    
1777            my ( @dl1, @dl2 );
1778            for ( my $i = 0; $i < $nd; $i++ ) {
1779                if ( $i & 1 ) { push @dl2, $dl[$i] } else { push @dl1, $dl[$i] }
1780      }      }
1781      elsif ( $dir > 0 ) {                   #  Small group first  
1782          @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref;          @$dl_ref = ( $dir < 0 ) ? ( @dl1, reverse @dl2 )
1783                                    : ( @dl2, reverse @dl1 );
1784      }      }
1785    
1786      #  Reorder within descendant subtrees:      #  Reorder within descendant subtrees:
# Line 2169  Line 2232 
2232  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2233  sub reroot_newick_to_approx_midpoint_w {  sub reroot_newick_to_approx_midpoint_w {
2234      my ( $tree ) = @_;      my ( $tree ) = @_;
2235        array_ref( $tree ) or return undef;
2236    
2237      #  Compile average tip to node distances assending from tips      #  Compile average tip to node distances assending from tips
2238    
# Line 2193  Line 2257 
2257  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
2258  sub reroot_newick_to_midpoint_w {  sub reroot_newick_to_midpoint_w {
2259      my ( $tree ) = @_;      my ( $tree ) = @_;
2260        array_ref( $tree ) or return ();
2261    
2262      #  Compile average tip to node distances assending      #  Compile average tip to node distances assending
2263    
# Line 2208  Line 2273 
2273  }  }
2274    
2275    
2276    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2277  sub average_to_tips_1_w {  sub average_to_tips_1_w {
2278      my ( $node ) = @_;      my ( $node ) = @_;
2279    
# Line 2233  Line 2299 
2299  }  }
2300    
2301    
2302    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303  sub average_to_tips_2_w {  sub average_to_tips_2_w {
2304      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;      my ( $dists1, $x_above, $n_above, $anc_node ) = @_;
2305      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;      my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1;
# Line 2933  Line 3000 
3000    
3001    
3002  #===============================================================================  #===============================================================================
3003    #  Random trees
3004    #===============================================================================
3005    #
3006    #   $tree = random_equibranch_tree(  @tips, \%options )
3007    #   $tree = random_equibranch_tree( \@tips, \%options )
3008    #   $tree = random_equibranch_tree(  @tips )
3009    #   $tree = random_equibranch_tree( \@tips )
3010    #
3011    #  Options:
3012    #
3013    #     length => $branch_length   # D = 1
3014    #
3015    #-------------------------------------------------------------------------------
3016    sub random_equibranch_tree
3017    {
3018        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3019                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3020                 :                                  {};
3021        return undef if ! defined $_[0];
3022    
3023        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3024        return undef if @tips < 2;
3025    
3026        my $len = $opts->{ length } ||= 1;
3027    
3028        if ( @tips == 2 )
3029        {
3030            return [ [ map { [ [], $_, $len ] } @tips ], undef, 0 ];
3031        }
3032    
3033        my $tree = [ [ ], undef, 0 ];
3034    
3035        my @links;  # \$anc_dl[i], i.e. a reference to an element in a descendent list
3036    
3037        my $anc_dl = $tree->[0];
3038        foreach my $tip ( splice( @tips, 0, 3 ) )
3039        {
3040            my $node = [ [], $tip, $len ];
3041            push @$anc_dl, $node;
3042            push @links, \$anc_dl->[-1];  #  Ref to the just added descendent list entry
3043        }
3044    
3045        foreach my $tip ( @tips )
3046        {
3047            my $link    = $links[ int( rand( scalar @links ) ) ];
3048            my $newtip  = [ [], $tip, $len ];
3049            my $new_dl  = [ $$link, $newtip ];
3050            my $newnode = [ $new_dl, undef, $len ];
3051            $$link = $newnode;
3052            push @links, \$new_dl->[0], \$new_dl->[1]
3053        }
3054    
3055        return $tree;
3056    }
3057    
3058    
3059    #-------------------------------------------------------------------------------
3060    #
3061    #   $tree = random_ultrametric_tree(  @tips, \%options )
3062    #   $tree = random_ultrametric_tree( \@tips, \%options )
3063    #   $tree = random_ultrametric_tree(  @tips )
3064    #   $tree = random_ultrametric_tree( \@tips )
3065    #
3066    #  Options:
3067    #
3068    #     depth => $root_to_tip_dist   # D = 1
3069    #
3070    #-------------------------------------------------------------------------------
3071    sub random_ultrametric_tree
3072    {
3073        my $opts = $_[ 0] && ref $_[ 0] eq 'HASH' ? shift
3074                 : $_[-1] && ref $_[-1] eq 'HASH' ? pop
3075                 :                                  {};
3076        return undef if ! defined $_[0];
3077    
3078        my @tips = ref $_[0] ? @{ $_[0] } : @_;
3079        return undef if @tips < 2;
3080    
3081        my $d2tip = $opts->{ depth } ||= 1;
3082    
3083        #  Random tip addition order (for rooted tree it matters):
3084    
3085        @tips = sort { rand() <=> 0.5 } @tips;
3086        my $tree = [ [ ], undef, 0 ];
3087    
3088        my $subtree_size = { $tree => 0 };  # total branch length of each subtree
3089    
3090        #  We start with root bifurcation:
3091    
3092        foreach my $tip ( splice( @tips, 0, 2 ) )
3093        {
3094            my $node = [ [], $tip, $d2tip ];
3095            push @{ $tree->[0] }, $node;
3096            $subtree_size->{ $node }  = $d2tip;
3097            $subtree_size->{ $tree } += $d2tip;
3098        }
3099    
3100        #  Add each remaining tip at $pos, measured along the contour length
3101        #  of the tree (with no retracing along branches).
3102    
3103        foreach my $tip ( @tips )
3104        {
3105            my $pos = rand( $subtree_size->{ $tree } );
3106            random_add_to_ultrametric_tree( $tree, $tip, $subtree_size, $pos, $d2tip );
3107        }
3108    
3109        return $tree;
3110    }
3111    
3112    
3113    sub random_add_to_ultrametric_tree
3114    {
3115        my ( $node, $tip, $subtree_size, $pos, $d2tip ) = @_;
3116        $node && $node->[0] && ref $node->[0] eq 'ARRAY' or die "Bad tree node passed to random_add_to_ultrametric_tree().\n";
3117    
3118        # Find the descendent line that it goes in:
3119    
3120        my $i;
3121        my $dl = $node->[0];
3122        my $size0 = $subtree_size->{ $dl->[0] };
3123        if ( $size0 > $pos ) { $i = 0 } else { $i = 1; $pos -= $size0 }
3124        my $desc = $dl->[$i];
3125    
3126        # Does it go within the subtree, or the branch to the subtree?
3127    
3128        my $len;
3129        my $added;
3130        if ( ( $len = $desc->[2] ) <= $pos )
3131        {
3132            $added = random_add_to_ultrametric_tree( $desc, $tip, $subtree_size, $pos - $len, $d2tip - $len );
3133        }
3134        else
3135        {
3136            # If not in subtree, then it goes in the branch to the descendent node
3137            #
3138            #     ----- node  ------------       node
3139            #       ^   /  \       ^             /  \
3140            #       |       \      | pos             \l1
3141            #       |        \     v                  \
3142            #       |      len\ ----------         newnode
3143            #       |          \                     /  \ l2
3144            # d2tip |           \                   /    \
3145            #       |           desc               /     desc
3146            #       |           /  \            l3/      /  \
3147            #       |          .    .            /      .    .
3148            #       v         .      .          /      .      .
3149            #     -----      .        .     newtip    .        .
3150    
3151            my $l1      = $pos;
3152            my $l2      = $len   - $pos;
3153            my $l3      = $d2tip - $pos;
3154            my $newtip  = [ [], $tip, $l3 ];
3155            my $newnode = [ [ $desc, $newtip ], undef, $l1 ];
3156            $dl->[$i]   = $newnode;
3157            $subtree_size->{ $newtip  } = $l3;
3158            $subtree_size->{ $newnode } = $subtree_size->{ $desc } + $l3;
3159            $desc->[2] = $l2;
3160            $subtree_size->{ $desc } -= $l1;
3161            $added = $l3;
3162        }
3163    
3164        #  New branch was inserted below this point:
3165    
3166        $subtree_size->{ $node } += $added;
3167        return $added;
3168    }
3169    
3170    
3171    
3172    #===============================================================================
3173  #  #
3174  #  Tree writing and reading  #  Tree writing and reading
3175  #  #
# Line 3578  Line 3815 
3815      my @lines;      my @lines;
3816      foreach ( ( $y1 .. $y2 ) )      foreach ( ( $y1 .. $y2 ) )
3817      {      {
3818          my $line = text_tree_row( $node, $hash, $_, [], 'tee_l' );          my $line = text_tree_row( $node, $hash, $_, [], 'tee_l', $dy >= 2 );
3819          my $lbl  = '';          my $lbl  = '';
3820          if ( @$line )          if ( @$line )
3821          {          {
# Line 3675  Line 3912 
3912          $yn1 = $ylist[ 0];          $yn1 = $ylist[ 0];
3913          $yn2 = $ylist[-1];          $yn2 = $ylist[-1];
3914          $y = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );          $y = int( 0.5 * ( $yn1 + $yn2 ) + ( $yrnd || 0.4999 ) );
3915    
3916            #  Handle special case of internal node label. Put it between subtrees.
3917    
3918            if ( ( $dy >= 2 ) && newick_lbl( $node ) && ( @dl > 1 ) ) {
3919                #  Find the descendents $i1 and $i2 to put the branch between
3920                my $i2 = 1;
3921                while ( ( $i2+1 < @ylist ) && ( $ylist[$i2] < $y ) ) { $i2++ }
3922                my $i1 = $i2 - 1;
3923                #  Get bottom of subtree1 and top of subtree2:
3924                my $ymax1 = $hash->{ $dl[ $i1 ] }->[ 1 ];
3925                my $ymin2 = $hash->{ $dl[ $i2 ] }->[ 0 ];
3926                #  Midway between bottom of subtree1 and top of subtree2, with
3927                #  preferred rounding direction
3928                $y = int( 0.5 * ( $ymax1 + $ymin2 ) + ( $yrnd || 0.4999 ) );
3929            }
3930      }      }
3931    
3932      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );      $y2 = int( $ymax - 0.5 * $dy + 0.4999 );
# Line 3707  Line 3959 
3959  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3960  #  Produce a description of one line of a printer plot tree.  #  Produce a description of one line of a printer plot tree.
3961  #  #
3962  #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb )  #  \@line = text_tree_row( $node, $hash, $row, \@line, $symb, $ilbl )
3963  #  #
3964  #     \@line is the character descriptions accumulated so far, one per array  #     \@line is the character descriptions accumulated so far, one per array
3965  #          element, except for a label, which can be any number of characters.  #          element, except for a label, which can be any number of characters.
# Line 3719  Line 3971 
3971  #     \%hash contains tree layout information  #     \%hash contains tree layout information
3972  #      $row  is the row number (y value) that we are building  #      $row  is the row number (y value) that we are building
3973  #      $symb is the plot symbol proposed for the current x and y position  #      $symb is the plot symbol proposed for the current x and y position
3974    #      $ilbl is true if internal node labels are allowed
3975  #  #
3976  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3977  sub text_tree_row  sub text_tree_row
3978  {  {
3979      my ( $node, $hash, $row, $line, $symb ) = @_;      my ( $node, $hash, $row, $line, $symb, $ilbl ) = @_;
3980    
3981      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };      my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } };
3982      if ( $row < $y1 || $row > $y2 ) { return $line }      if ( $row < $y1 || $row > $y2 ) { return $line }
# Line 3757  Line 4010 
4010          foreach ( @list ) {          foreach ( @list ) {
4011              my ( $n, $s ) = @$_;              my ( $n, $s ) = @$_;
4012              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {              if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) {
4013                  $line = text_tree_row( $n, $hash, $row, $line, $s );                  $line = text_tree_row( $n, $hash, $row, $line, $s, $ilbl );
4014              }              }
4015           }           }
4016    
4017          if ( $row == $y ) {          if ( $row == $y ) {
4018              $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'              $line->[$x] = ( $line->[$x] eq 'horiz' ) ? 'tee_l'
4019                                                       : $with_left_line{ $line->[$x] };                                                       : $with_left_line{ $line->[$x] };
4020                push( @$line, newick_lbl( $node ), '' ) if $ilbl && newick_lbl( $node );
4021          }          }
4022      }      }
4023    
# Line 3808  Line 4062 
4062  {  {
4063      my $file = shift;      my $file = shift;
4064      my $fh;      my $fh;
4065      if    ( ! defined( $file ) )     { return ( \*STDIN ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDIN ) }
4066      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file   ) }
4067      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close      elsif ( open( $fh, "<$file" ) )  { return ( $fh, 1  ) } # Need to close
4068    
# Line 3829  Line 4083 
4083  {  {
4084      my $file = shift;      my $file = shift;
4085      my $fh;      my $fh;
4086      if    ( ! defined( $file ) )     { return ( \*STDOUT ) }      if    ( ! defined $file || $file eq '' ) { return ( \*STDOUT ) }
4087      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }      elsif ( ref( $file ) eq 'GLOB' ) { return ( $file    ) }
4088      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close      elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1   ) } # Need to close
4089    

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3