Parent Directory
|
Revision Log
Revision 1.9 - (view) (download) (as text)
1 : | olson | 1.2 | # |
2 : | golsen | 1.8 | # Copyright (c) 2003-2007 University of Chicago and Fellowship |
3 : | olson | 1.2 | # for Interpretations of Genomes. All Rights Reserved. |
4 : | # | ||
5 : | # This file is part of the SEED Toolkit. | ||
6 : | golsen | 1.9 | # |
7 : | olson | 1.2 | # The SEED Toolkit is free software. You can redistribute |
8 : | # it and/or modify it under the terms of the SEED Toolkit | ||
9 : | golsen | 1.9 | # Public License. |
10 : | olson | 1.2 | # |
11 : | # You should have received a copy of the SEED Toolkit Public License | ||
12 : | # along with this program; if not write to the University of Chicago | ||
13 : | # at info@ci.uchicago.edu or the Fellowship for Interpretation of | ||
14 : | # Genomes at veronika@thefig.info or download a copy from | ||
15 : | # http://www.theseed.org/LICENSE.TXT. | ||
16 : | # | ||
17 : | |||
18 : | golsen | 1.1 | package gjonewicklib; |
19 : | |||
20 : | #=============================================================================== | ||
21 : | # perl functions for dealing with trees | ||
22 : | # | ||
23 : | # Usage: | ||
24 : | # use gjonewicklib | ||
25 : | # | ||
26 : | # | ||
27 : | #=============================================================================== | ||
28 : | # Tree data structures: | ||
29 : | #=============================================================================== | ||
30 : | # | ||
31 : | # Elements in newick text file are: | ||
32 : | # | ||
33 : | # [c1] ( desc_1, desc_2, ... ) [c2] label [c3] : [c4] x [c5] | ||
34 : | # | ||
35 : | # Note that: | ||
36 : | golsen | 1.9 | # |
37 : | golsen | 1.1 | # Comment list 1 can exist on any subtree, but its association with |
38 : | # tree components can be upset by rerooting | ||
39 : | # Comment list 2 cannot exist without a descendant list | ||
40 : | # Comment list 3 cannot exist without a label | ||
41 : | # Comment lists 4 and 5 cannot exist without a branch length | ||
42 : | # | ||
43 : | # Elements in perl representation are: | ||
44 : | # | ||
45 : | # $tree = \@rootnode; | ||
46 : | # | ||
47 : | overbeek | 1.7 | # $node = [ \@desc, # reference to list of descendants |
48 : | golsen | 1.1 | # $label, # node label |
49 : | # $x, # branch length | ||
50 : | # \@c1, # reference to comment list 1 | ||
51 : | # \@c2, # reference to comment list 2 | ||
52 : | # \@c3, # reference to comment list 3 | ||
53 : | # \@c4, # reference to comment list 4 | ||
54 : | # \@c5 # reference to comment list 5 | ||
55 : | overbeek | 1.7 | # ] |
56 : | golsen | 1.1 | # |
57 : | # At present, no routine tests or enforces the length of the list (a single | ||
58 : | # element list could be a valid internal node). | ||
59 : | # | ||
60 : | # All empty lists can be [] or undef | ||
61 : | # | ||
62 : | # Putting the comments at the end allows a shorter list nearly all the | ||
63 : | # time, but is different from the prolog representation. | ||
64 : | # | ||
65 : | # | ||
66 : | overbeek | 1.7 | # Ross Overbeek has a different tree node structure: |
67 : | # | ||
68 : | # $node = [ Label, | ||
69 : | # DistanceToParent, | ||
70 : | # [ ParentPointer, ChildPointer1, ... ], | ||
71 : | # [ Name1\tVal1, Name2\tVal2, ... ] | ||
72 : | # ] | ||
73 : | # | ||
74 : | # So: | ||
75 : | # | ||
76 : | #=============================================================================== | ||
77 : | # Tree format interconversion: | ||
78 : | #=============================================================================== | ||
79 : | # | ||
80 : | golsen | 1.9 | # $bool = is_overbeek_tree( $tree ) |
81 : | # $bool = is_gjonewick_tree( $tree ) | ||
82 : | # | ||
83 : | overbeek | 1.7 | # $gjonewick = overbeek_to_gjonewick( $overbeek ) |
84 : | # $overbeek = gjonewick_to_overbeek( $gjonewick ) | ||
85 : | # | ||
86 : | golsen | 1.1 | #=============================================================================== |
87 : | # Tree data extraction: | ||
88 : | #=============================================================================== | ||
89 : | # | ||
90 : | # $listref = newick_desc_ref( $noderef ) | ||
91 : | # $label = newick_lbl( $noderef ) | ||
92 : | # $x = newick_x( $noderef ) | ||
93 : | # $listref = newick_c1( $noderef ) | ||
94 : | # $listref = newick_c2( $noderef ) | ||
95 : | # $listref = newick_c3( $noderef ) | ||
96 : | # $listref = newick_c4( $noderef ) | ||
97 : | # $listref = newick_c5( $noderef ) | ||
98 : | # | ||
99 : | # @desclist = newick_desc_list( $noderef ) | ||
100 : | # $n = newick_n_desc( $noderef ) | ||
101 : | # $descref = newick_desc_i( $noderef, $i ) # 1-based numbering | ||
102 : | # $bool = newick_is_tip( $noderef ) | ||
103 : | # | ||
104 : | # set_newick_desc_ref( $noderef, $listref ) | ||
105 : | # set_newick_lbl( $noderef, $label ) | ||
106 : | # set_newick_x( $noderef, $x ) | ||
107 : | # set_newick_c1( $noderef, $listref ) | ||
108 : | # set_newick_c2( $noderef, $listref ) | ||
109 : | # set_newick_c3( $noderef, $listref ) | ||
110 : | # set_newick_c4( $noderef, $listref ) | ||
111 : | # set_newick_c5( $noderef, $listref ) | ||
112 : | # set_newick_desc_list( $noderef, @desclist ) | ||
113 : | golsen | 1.9 | # set_newick_desc_i( $noderef1, $i, $noderef2 ) # 1-based numbering |
114 : | golsen | 1.8 | # |
115 : | # $bool = newick_is_valid( $noderef ) # verify that tree is valid | ||
116 : | golsen | 1.1 | # |
117 : | # $bool = newick_is_rooted( $noderef ) # 2 branches from root | ||
118 : | # $bool = newick_is_unrooted( $noderef ) # 3 or more branches from root | ||
119 : | # $bool = newick_is_tip_rooted( $noderef ) # 1 branch from root | ||
120 : | # $bool = newick_is_bifurcating( $noderef ) | ||
121 : | # | ||
122 : | # $n = newick_tip_count( $noderef ) | ||
123 : | # @tiprefs = newick_tip_ref_list( $noderef ) | ||
124 : | golsen | 1.9 | # \@tiprefs = newick_tip_ref_list( $noderef ) |
125 : | golsen | 1.1 | # @tips = newick_tip_list( $noderef ) |
126 : | golsen | 1.9 | # \@tips = newick_tip_list( $noderef ) |
127 : | # | ||
128 : | golsen | 1.1 | # $tipref = newick_first_tip_ref( $noderef ) |
129 : | # $tip = newick_first_tip( $noderef ) | ||
130 : | golsen | 1.9 | # |
131 : | golsen | 1.1 | # @tips = newick_duplicated_tips( $noderef ) |
132 : | golsen | 1.9 | # \@tips = newick_duplicated_tips( $noderef ) |
133 : | # | ||
134 : | golsen | 1.1 | # $bool = newick_tip_in_tree( $noderef, $tipname ) |
135 : | golsen | 1.9 | # |
136 : | golsen | 1.1 | # @tips = newick_shared_tips( $tree1, $tree2 ) |
137 : | golsen | 1.9 | # \@tips = newick_shared_tips( $tree1, $tree2 ) |
138 : | golsen | 1.1 | # |
139 : | # $length = newick_tree_length( $noderef ) | ||
140 : | golsen | 1.9 | # |
141 : | # %tip_distances = newick_tip_distances( $noderef ) | ||
142 : | # \%tip_distances = newick_tip_distances( $noderef ) | ||
143 : | # | ||
144 : | golsen | 1.1 | # $xmax = newick_max_X( $noderef ) |
145 : | # ( $tipref, $xmax ) = newick_most_distant_tip_ref( $noderef ) | ||
146 : | golsen | 1.9 | # ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef ) |
147 : | golsen | 1.1 | # |
148 : | golsen | 1.8 | # Tree tip insertion point (tip is on branch of length x that |
149 : | # is inserted into branch connecting node1 and node2, a distance | ||
150 : | # x1 from node1 and x2 from node2): | ||
151 : | # | ||
152 : | golsen | 1.9 | # [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip ) |
153 : | golsen | 1.8 | # |
154 : | # Standardized label for a node in terms of intersection of 3 lowest sorting | ||
155 : | # tips (sort is lower case): | ||
156 : | # | ||
157 : | golsen | 1.9 | # @TipOrTips = std_node_name( $tree, $node ) |
158 : | golsen | 1.8 | # |
159 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
160 : | # Paths from root of tree: | ||
161 : | #------------------------------------------------------------------------------- | ||
162 : | # | ||
163 : | # Path descriptions are of form: | ||
164 : | # ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN ) | ||
165 : | # () is returned upon failure | ||
166 : | # | ||
167 : | # @path = path_to_tip( $treenode, $tipname ) | ||
168 : | # @path = path_to_named_node( $treenode, $nodename ) | ||
169 : | # @path = path_to_node_ref( $treenode, $noderef ) | ||
170 : | # | ||
171 : | # @path = path_to_node( $node, $tip1, $tip2, $tip3 ) # 3 tip names | ||
172 : | # @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] ) # Array of tips | ||
173 : | # @path = path_to_node( $node, $tip1 ) # Use path_to_tip | ||
174 : | # @path = path_to_node( $node, [ $tip1 ] ) # Use path_to_tip | ||
175 : | # | ||
176 : | # $distance = newick_path_length( @path ) | ||
177 : | # $distance = tip_to_tip_distance( $tree, $tip1, $tip2 ) | ||
178 : | # $distance = node_to_node_distance( $tree, $node1, $node2 ) | ||
179 : | # | ||
180 : | # | ||
181 : | #=============================================================================== | ||
182 : | # Tree manipulations: | ||
183 : | #=============================================================================== | ||
184 : | # | ||
185 : | overbeek | 1.4 | # $treecopy = copy_newick_tree( $tree ) |
186 : | golsen | 1.1 | # |
187 : | #------------------------------------------------------------------------------- | ||
188 : | golsen | 1.8 | # The following modify the existing tree, and possibly any components of that |
189 : | golsen | 1.1 | # tree that are reached by reference. If the old version is still needed, copy |
190 : | # before modifying. | ||
191 : | #------------------------------------------------------------------------------- | ||
192 : | # | ||
193 : | # Modify labels: | ||
194 : | # | ||
195 : | # $newtree = newick_relabel_nodes( $node, \%new_name ) | ||
196 : | # $newtree = newick_relabel_nodes_i( $node, \%new_name ) | ||
197 : | # $newtree = newick_relabel_tips( $node, \%new_name ) | ||
198 : | # $newtree = newick_relabel_tips_i( $node, \%new_name ) | ||
199 : | # | ||
200 : | # Modify branches: | ||
201 : | # | ||
202 : | # $n_changed = newick_set_undefined_branches( $node, $x ) | ||
203 : | # $n_changed = newick_set_all_branches( $node, $x ) | ||
204 : | golsen | 1.5 | # $n_changed = newick_fix_negative_branches( $tree ) |
205 : | overbeek | 1.7 | # $node = newick_rescale_branches( $node, $factor ) |
206 : | golsen | 1.1 | # |
207 : | golsen | 1.8 | # Modify comments: |
208 : | # | ||
209 : | # $node = newick_strip_comments( $node ) | ||
210 : | # | ||
211 : | golsen | 1.1 | # Modify rooting and/or order: |
212 : | # | ||
213 : | # $nrmtree = normalize_newick_tree( $tree ) | ||
214 : | # $revtree = reverse_newick_tree( $tree ) | ||
215 : | # $stdtree = std_unrooted_newick( $tree ) | ||
216 : | # $newtree = aesthetic_newick_tree( $tree, $direction ) | ||
217 : | # $rndtree = random_order_newick_tree( $tree ) | ||
218 : | # $newtree = reroot_newick_by_path( @path ) | ||
219 : | # $newtree = reroot_newick_to_tip( $tree, $tip ) | ||
220 : | # $newtree = reroot_newick_next_to_tip( $tree, $tip ) | ||
221 : | # $newtree = reroot_newick_to_node( $tree, @node ) | ||
222 : | # $newtree = reroot_newick_to_node_ref( $tree, $noderef ) | ||
223 : | golsen | 1.9 | # $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction ) |
224 : | golsen | 1.5 | # $newtree = reroot_newick_to_approx_midpoint( $tree ) # unweighted |
225 : | # $newtree = reroot_newick_to_approx_midpoint_w( $tree ) # weight by tips | ||
226 : | golsen | 1.1 | # $newtree = uproot_tip_rooted_newick( $tree ) |
227 : | # $newtree = uproot_newick( $tree ) | ||
228 : | # | ||
229 : | # $newtree = prune_from_newick( $tree, $tip ) | ||
230 : | # $newtree = newick_subtree( $tree, @tips ) | ||
231 : | # $newtree = newick_subtree( $tree, \@tips ) | ||
232 : | # | ||
233 : | golsen | 1.5 | # $newtree = collapse_zero_length_branches( $tree ) |
234 : | # | ||
235 : | golsen | 1.8 | # $node = newick_insert_at_node( $node, $subtree ) |
236 : | # $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction ) | ||
237 : | # | ||
238 : | golsen | 1.1 | #=============================================================================== |
239 : | golsen | 1.9 | # Tree neighborhood: subtree of n tips to represent a larger tree. |
240 : | #=============================================================================== | ||
241 : | # | ||
242 : | # Focus around root: | ||
243 : | # | ||
244 : | # $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority ) | ||
245 : | # $subtree = root_neighborhood_representative_tree( $tree, $n ) | ||
246 : | # @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
247 : | # @tips = root_neighborhood_representative_tips( $tree, $n ) | ||
248 : | # \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
249 : | # \@tips = root_neighborhood_representative_tips( $tree, $n ) | ||
250 : | # | ||
251 : | # Focus around a tip insertion point (the tip is not in the subtree): | ||
252 : | # | ||
253 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority ) | ||
254 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n ) | ||
255 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
256 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
257 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
258 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
259 : | # | ||
260 : | #=============================================================================== | ||
261 : | golsen | 1.1 | # Tree reading and writing: |
262 : | #=============================================================================== | ||
263 : | golsen | 1.9 | # Write machine-readable trees: |
264 : | golsen | 1.1 | # |
265 : | # writeNewickTree( $tree ) | ||
266 : | # writeNewickTree( $tree, $file ) | ||
267 : | overbeek | 1.7 | # writeNewickTree( $tree, \*FH ) |
268 : | # fwriteNewickTree( $file, $tree ) # Matches the C arg list for f... I/O | ||
269 : | golsen | 1.1 | # $treestring = swriteNewickTree( $tree ) |
270 : | # $treestring = formatNewickTree( $tree ) | ||
271 : | golsen | 1.9 | # |
272 : | # Write human-readable trees: | ||
273 : | # | ||
274 : | golsen | 1.1 | # @textlines = text_plot_newick( $node, $width, $min_dx, $dy ) |
275 : | # printer_plot_newick( $node, $file, $width, $min_dx, $dy ) | ||
276 : | # | ||
277 : | golsen | 1.9 | # Read trees: |
278 : | # | ||
279 : | overbeek | 1.7 | # $tree = read_newick_tree( $file ) # reads to a semicolon |
280 : | # @trees = read_newick_trees( $file ) # reads to end of file | ||
281 : | # $tree = parse_newick_tree_str( $string ) | ||
282 : | golsen | 1.1 | # |
283 : | #=============================================================================== | ||
284 : | |||
285 : | |||
286 : | golsen | 1.8 | use Carp; |
287 : | use Data::Dumper; | ||
288 : | golsen | 1.9 | use strict; |
289 : | golsen | 1.8 | |
290 : | golsen | 1.1 | require Exporter; |
291 : | |||
292 : | our @ISA = qw(Exporter); | ||
293 : | our @EXPORT = qw( | ||
294 : | golsen | 1.9 | is_overbeek_tree |
295 : | is_gjonewick_tree | ||
296 : | overbeek | 1.7 | overbeek_to_gjonewick |
297 : | gjonewick_to_overbeek | ||
298 : | golsen | 1.8 | newick_is_valid |
299 : | golsen | 1.1 | newick_is_rooted |
300 : | newick_is_unrooted | ||
301 : | tree_rooted_on_tip | ||
302 : | newick_is_bifurcating | ||
303 : | newick_tip_count | ||
304 : | golsen | 1.9 | newick_tip_ref_list |
305 : | golsen | 1.1 | newick_tip_list |
306 : | golsen | 1.9 | |
307 : | golsen | 1.1 | newick_first_tip |
308 : | newick_duplicated_tips | ||
309 : | newick_tip_in_tree | ||
310 : | newick_shared_tips | ||
311 : | |||
312 : | newick_tree_length | ||
313 : | golsen | 1.9 | newick_tip_distances |
314 : | golsen | 1.1 | newick_max_X |
315 : | newick_most_distant_tip_ref | ||
316 : | newick_most_distant_tip_name | ||
317 : | golsen | 1.9 | |
318 : | newick_tip_insertion_point | ||
319 : | |||
320 : | golsen | 1.1 | std_newick_name |
321 : | |||
322 : | path_to_tip | ||
323 : | path_to_named_node | ||
324 : | path_to_node_ref | ||
325 : | path_to_node | ||
326 : | |||
327 : | newick_path_length | ||
328 : | tip_to_tip_distance | ||
329 : | node_to_node_distance | ||
330 : | |||
331 : | overbeek | 1.4 | copy_newick_tree |
332 : | golsen | 1.1 | |
333 : | newick_relabel_nodes | ||
334 : | newick_relabel_nodes_i | ||
335 : | newick_relabel_tips | ||
336 : | newick_relabel_tips_i | ||
337 : | |||
338 : | newick_set_undefined_branches | ||
339 : | newick_set_all_branches | ||
340 : | golsen | 1.5 | newick_fix_negative_branches |
341 : | overbeek | 1.7 | newick_rescale_branches |
342 : | golsen | 1.1 | |
343 : | golsen | 1.8 | newick_strip_comments |
344 : | |||
345 : | golsen | 1.1 | normalize_newick_tree |
346 : | reverse_newick_tree | ||
347 : | std_unrooted_newick | ||
348 : | aesthetic_newick_tree | ||
349 : | unaesthetic_newick_tree | ||
350 : | random_order_newick_tree | ||
351 : | |||
352 : | reroot_newick_by_path | ||
353 : | reroot_newick_to_tip | ||
354 : | reroot_newick_next_to_tip | ||
355 : | reroot_newick_to_node | ||
356 : | reroot_newick_to_node_ref | ||
357 : | golsen | 1.9 | reroot_newick_between_nodes |
358 : | golsen | 1.1 | reroot_newick_to_approx_midpoint |
359 : | golsen | 1.5 | reroot_newick_to_approx_midpoint_w |
360 : | golsen | 1.1 | uproot_tip_rooted_newick |
361 : | uproot_newick | ||
362 : | |||
363 : | prune_from_newick | ||
364 : | newick_subtree | ||
365 : | golsen | 1.5 | collapse_zero_length_branches |
366 : | golsen | 1.1 | |
367 : | golsen | 1.8 | newick_insert_at_node |
368 : | newick_insert_between_nodes | ||
369 : | |||
370 : | golsen | 1.9 | root_neighborhood_representative_tree |
371 : | root_neighborhood_representative_tips | ||
372 : | tip_neighborhood_representative_tree | ||
373 : | tip_neighborhood_representative_tips | ||
374 : | |||
375 : | golsen | 1.1 | writeNewickTree |
376 : | fwriteNewickTree | ||
377 : | strNewickTree | ||
378 : | formatNewickTree | ||
379 : | overbeek | 1.7 | |
380 : | read_newick_tree | ||
381 : | read_newick_trees | ||
382 : | golsen | 1.1 | parse_newick_tree_str |
383 : | |||
384 : | printer_plot_newick | ||
385 : | text_plot_newick | ||
386 : | ); | ||
387 : | |||
388 : | our @EXPORT_OK = qw( | ||
389 : | newick_desc_ref | ||
390 : | newick_lbl | ||
391 : | newick_x | ||
392 : | newick_c1 | ||
393 : | newick_c2 | ||
394 : | newick_c3 | ||
395 : | newick_c4 | ||
396 : | newick_c5 | ||
397 : | newick_desc_list | ||
398 : | newick_n_desc | ||
399 : | newick_desc_i | ||
400 : | newick_is_tip | ||
401 : | newick_is_valid | ||
402 : | |||
403 : | set_newick_desc_ref | ||
404 : | set_newick_lbl | ||
405 : | set_newick_x | ||
406 : | set_newick_c1 | ||
407 : | set_newick_c2 | ||
408 : | set_newick_c3 | ||
409 : | set_newick_c4 | ||
410 : | set_newick_c5 | ||
411 : | |||
412 : | set_newick_desc_list | ||
413 : | set_newick_desc_i | ||
414 : | |||
415 : | add_to_newick_branch | ||
416 : | dump_tree | ||
417 : | ); | ||
418 : | |||
419 : | |||
420 : | #------------------------------------------------------------------------------- | ||
421 : | # Internally used definitions | ||
422 : | #------------------------------------------------------------------------------- | ||
423 : | |||
424 : | sub array_ref { ref( $_[0] ) eq "ARRAY" } | ||
425 : | sub hash_ref { ref( $_[0] ) eq "HASH" } | ||
426 : | |||
427 : | |||
428 : | #=============================================================================== | ||
429 : | golsen | 1.9 | # Interconvert overbeek and gjonewick trees: |
430 : | overbeek | 1.7 | #=============================================================================== |
431 : | |||
432 : | golsen | 1.9 | sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) } |
433 : | |||
434 : | sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) } | ||
435 : | |||
436 : | overbeek | 1.7 | sub overbeek_to_gjonewick |
437 : | { | ||
438 : | return () unless ref( $_[0] ) eq 'ARRAY'; | ||
439 : | my ( $lbl, $x, $desc ) = @{ $_[0] }; | ||
440 : | my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : (); | ||
441 : | [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ] | ||
442 : | } | ||
443 : | |||
444 : | sub gjonewick_to_overbeek | ||
445 : | { | ||
446 : | return () unless ref( $_[0] ) eq 'ARRAY'; | ||
447 : | my ( $desc, $lbl, $x ) = @{ $_[0] }; | ||
448 : | my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : (); | ||
449 : | my $parent = $_[1]; | ||
450 : | my $node = [ $lbl, $x, undef, [] ]; | ||
451 : | $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ]; | ||
452 : | return $node; | ||
453 : | } | ||
454 : | |||
455 : | golsen | 1.9 | |
456 : | overbeek | 1.7 | #=============================================================================== |
457 : | golsen | 1.1 | # Extract tree structure values: |
458 : | #=============================================================================== | ||
459 : | # | ||
460 : | # $listref = newick_desc_ref( $noderef ) | ||
461 : | # $string = newick_lbl( $noderef ) | ||
462 : | # $real = newick_x( $noderef ) | ||
463 : | # $listref = newick_c1( $noderef ) | ||
464 : | # $listref = newick_c2( $noderef ) | ||
465 : | # $listref = newick_c3( $noderef ) | ||
466 : | # $listref = newick_c4( $noderef ) | ||
467 : | # $listref = newick_c5( $noderef ) | ||
468 : | # @list = newick_desc_list( $noderef ) | ||
469 : | # $int = newick_n_desc( $noderef ) | ||
470 : | # $listref = newick_desc_i( $noderef ) | ||
471 : | # $bool = node_is_tip( $noderef ) | ||
472 : | # $bool = node_is_valid( $noderef ) | ||
473 : | # | ||
474 : | #------------------------------------------------------------------------------- | ||
475 : | |||
476 : | sub newick_desc_ref { $_[0]->[0] } # = ${$_[0]}[0] | ||
477 : | golsen | 1.8 | sub newick_lbl { ref($_[0]) ? $_[0]->[1] : Carp::confess() } |
478 : | golsen | 1.1 | sub newick_x { $_[0]->[2] } |
479 : | sub newick_c1 { $_[0]->[3] } | ||
480 : | sub newick_c2 { $_[0]->[4] } | ||
481 : | sub newick_c3 { $_[0]->[5] } | ||
482 : | sub newick_c4 { $_[0]->[6] } | ||
483 : | sub newick_c5 { $_[0]->[7] } | ||
484 : | |||
485 : | sub newick_desc_list { | ||
486 : | my $node = $_[0]; | ||
487 : | ! array_ref( $node ) ? undef : | ||
488 : | array_ref( $node->[0] ) ? @{ $node->[0] } : | ||
489 : | () ; | ||
490 : | } | ||
491 : | |||
492 : | sub newick_n_desc { | ||
493 : | my $node = $_[0]; | ||
494 : | ! array_ref( $node ) ? undef : | ||
495 : | array_ref( $node->[0] ) ? scalar @{ $node->[0] } : | ||
496 : | 0 ; | ||
497 : | } | ||
498 : | |||
499 : | sub newick_desc_i { | ||
500 : | my ( $node, $i ) = @_; | ||
501 : | ! array_ref( $node ) ? undef : | ||
502 : | array_ref( $node->[0] ) ? $node->[0]->[$i-1] : | ||
503 : | undef ; | ||
504 : | } | ||
505 : | |||
506 : | sub node_is_tip { | ||
507 : | my $node = $_[0]; | ||
508 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
509 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 0 : # Empty descend list? | ||
510 : | 1 ; # No descend list | ||
511 : | } | ||
512 : | |||
513 : | sub node_is_valid { # An array ref with nonempty descend list or a label | ||
514 : | my $node = $_[0]; | ||
515 : | array_ref( $node ) && ( array_ref( $node->[0] ) && @{ $node->[0] } | ||
516 : | || defined( $node->[1] ) | ||
517 : | ) | ||
518 : | } | ||
519 : | |||
520 : | |||
521 : | #------------------------------------------------------------------------------- | ||
522 : | # Set tree structure values | ||
523 : | #------------------------------------------------------------------------------- | ||
524 : | |||
525 : | sub set_newick_desc_ref { $_[0]->[0] = $_[1] } | ||
526 : | sub set_newick_lbl { $_[0]->[1] = $_[1] } | ||
527 : | sub set_newick_x { $_[0]->[2] = $_[1] } | ||
528 : | sub set_newick_c1 { $_[0]->[3] = $_[1] } | ||
529 : | sub set_newick_c2 { $_[0]->[4] = $_[1] } | ||
530 : | sub set_newick_c3 { $_[0]->[5] = $_[1] } | ||
531 : | sub set_newick_c4 { $_[0]->[6] = $_[1] } | ||
532 : | sub set_newick_c5 { $_[0]->[7] = $_[1] } | ||
533 : | |||
534 : | sub set_newick_desc_list { | ||
535 : | my $node = shift; | ||
536 : | array_ref( $node ) || return; | ||
537 : | if ( array_ref( $node->[0] ) ) { @{ $node->[0] } = @_ } | ||
538 : | else { $node->[0] = [ @_ ] } | ||
539 : | } | ||
540 : | |||
541 : | sub set_newick_desc_i { | ||
542 : | my ( $node1, $i, $node2 ) = @_; | ||
543 : | array_ref( $node1 ) && array_ref( $node2 ) || return; | ||
544 : | if ( array_ref( $node1->[0] ) ) { $node1->[0]->[$i-1] = $node2 } | ||
545 : | else { $node1->[0] = [ $node2 ] } | ||
546 : | } | ||
547 : | |||
548 : | |||
549 : | #=============================================================================== | ||
550 : | # Some tree property tests: | ||
551 : | #=============================================================================== | ||
552 : | golsen | 1.8 | # Tree is valid? |
553 : | # | ||
554 : | # $bool = newick_is_valid( $node, $verbose ) | ||
555 : | #------------------------------------------------------------------------------- | ||
556 : | sub newick_is_valid | ||
557 : | { | ||
558 : | my $node = shift; | ||
559 : | |||
560 : | if ( ! array_ref( $node ) ) | ||
561 : | { | ||
562 : | print STDERR "Node is not array reference\n" if $_[0]; | ||
563 : | return 0; | ||
564 : | } | ||
565 : | |||
566 : | my @node = @$node; | ||
567 : | if ( ! @node ) | ||
568 : | { | ||
569 : | print STDERR "Node is empty array reference\n" if $_[0]; | ||
570 : | return 0; | ||
571 : | } | ||
572 : | |||
573 : | # Must have descendant or label: | ||
574 : | |||
575 : | if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] ) | ||
576 : | { | ||
577 : | print STDERR "Node has neither descendant nor label\n" if $_[0]; | ||
578 : | return 0; | ||
579 : | } | ||
580 : | |||
581 : | # If comments are present, they must be array references | ||
582 : | |||
583 : | foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () ) | ||
584 : | { | ||
585 : | if ( defined( $_ ) && ! array_ref( $_ ) ) | ||
586 : | { | ||
587 : | print STDERR "Node has neither descendant or label\n" if $_[0]; | ||
588 : | return 0; | ||
589 : | } | ||
590 : | } | ||
591 : | |||
592 : | # Inspect the descendants: | ||
593 : | |||
594 : | foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () ) | ||
595 : | { | ||
596 : | newick_is_valid( $_, @_ ) || return 0 | ||
597 : | } | ||
598 : | |||
599 : | return 1; | ||
600 : | } | ||
601 : | |||
602 : | |||
603 : | #------------------------------------------------------------------------------- | ||
604 : | golsen | 1.1 | # Tree is rooted (2 branches at root node)? |
605 : | # | ||
606 : | # $bool = newick_is_rooted( $node ) | ||
607 : | #------------------------------------------------------------------------------- | ||
608 : | sub newick_is_rooted { | ||
609 : | my $node = $_[0]; | ||
610 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
611 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 2 : # 2 branches | ||
612 : | 0 ; # No descend list | ||
613 : | } | ||
614 : | |||
615 : | |||
616 : | #------------------------------------------------------------------------------- | ||
617 : | # Tree is unrooted (> 2 branches at root node)? | ||
618 : | # | ||
619 : | # $bool = newick_is_unrooted( $node ) | ||
620 : | #------------------------------------------------------------------------------- | ||
621 : | sub newick_is_unrooted { | ||
622 : | my $node = $_[0]; | ||
623 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
624 : | array_ref( $node->[0] ) ? @{ $node->[0] } >= 3 : # Over 2 branches | ||
625 : | 0 ; # No descend list | ||
626 : | } | ||
627 : | |||
628 : | |||
629 : | #------------------------------------------------------------------------------- | ||
630 : | # Tree is rooted on a tip (1 branch at root node)? | ||
631 : | # | ||
632 : | # $bool = newick_is_tip_rooted( $node ) | ||
633 : | #------------------------------------------------------------------------------- | ||
634 : | sub newick_is_tip_rooted { | ||
635 : | my $node = $_[0]; | ||
636 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
637 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 1 : # 1 branch | ||
638 : | 0 ; # No descend list | ||
639 : | } | ||
640 : | |||
641 : | #=============================================================================== | ||
642 : | # Everything below this point refers to parts of the tree structure using | ||
643 : | # only the routines above. | ||
644 : | #=============================================================================== | ||
645 : | # Tree is bifurcating? If so, return number of descendents of root node. | ||
646 : | # | ||
647 : | # $n_desc = newick_is_bifurcating( $node ) | ||
648 : | #------------------------------------------------------------------------------- | ||
649 : | sub newick_is_bifurcating { | ||
650 : | my ( $node, $not_root ) = @_; | ||
651 : | if ( ! array_ref( $node ) ) { return undef } # Bad arg | ||
652 : | |||
653 : | my $n = newick_n_desc( $node ); | ||
654 : | $n == 0 && ! $not_root ? 0 : | ||
655 : | $n == 1 && $not_root ? 0 : | ||
656 : | $n == 3 && $not_root ? 0 : | ||
657 : | $n > 3 ? 0 : | ||
658 : | $n > 2 && ! newick_is_bifurcating( newick_desc_i( $node, 3, 1 ) ) ? 0 : | ||
659 : | $n > 1 && ! newick_is_bifurcating( newick_desc_i( $node, 2, 1 ) ) ? 0 : | ||
660 : | $n > 0 && ! newick_is_bifurcating( newick_desc_i( $node, 1, 1 ) ) ? 0 : | ||
661 : | $n | ||
662 : | } | ||
663 : | |||
664 : | |||
665 : | #------------------------------------------------------------------------------- | ||
666 : | # Number of tips: | ||
667 : | # | ||
668 : | # $n = newick_tip_count( $node ) | ||
669 : | #------------------------------------------------------------------------------- | ||
670 : | sub newick_tip_count { | ||
671 : | my ( $node, $not_root ) = @_; | ||
672 : | |||
673 : | my $imax = newick_n_desc( $node ); | ||
674 : | if ( $imax < 1 ) { return 1 } | ||
675 : | |||
676 : | # Special case for tree rooted on tip | ||
677 : | |||
678 : | my $n = ( $imax == 1 && ( ! $not_root ) ) ? 1 : 0; | ||
679 : | |||
680 : | foreach ( newick_desc_list( $node ) ) { $n += newick_tip_count( $_, 1 ) } | ||
681 : | |||
682 : | $n; | ||
683 : | } | ||
684 : | |||
685 : | |||
686 : | #------------------------------------------------------------------------------- | ||
687 : | # List of tip nodes: | ||
688 : | # | ||
689 : | golsen | 1.9 | # @tips = newick_tip_ref_list( $noderef ) |
690 : | # \@tips = newick_tip_ref_list( $noderef ) | ||
691 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
692 : | sub newick_tip_ref_list { | ||
693 : | my ( $node, $not_root ) = @_; | ||
694 : | |||
695 : | my $imax = newick_n_desc( $node ); | ||
696 : | if ( $imax < 1 ) { return $node } | ||
697 : | |||
698 : | my @list = (); | ||
699 : | |||
700 : | # Tree rooted on tip? | ||
701 : | if ( $imax == 1 && ! $not_root && newick_lbl( $node ) ) { push @list, $node } | ||
702 : | |||
703 : | foreach ( newick_desc_list( $node ) ) { | ||
704 : | push @list, newick_tip_ref_list( $_, 1 ); | ||
705 : | } | ||
706 : | |||
707 : | golsen | 1.9 | wantarray ? @list : \@list; |
708 : | golsen | 1.1 | } |
709 : | |||
710 : | |||
711 : | #------------------------------------------------------------------------------- | ||
712 : | # List of tips: | ||
713 : | # | ||
714 : | # @tips = newick_tip_list( $node ) | ||
715 : | golsen | 1.9 | # \@tips = newick_tip_list( $node ) |
716 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
717 : | sub newick_tip_list { | ||
718 : | golsen | 1.9 | my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] ); |
719 : | wantarray ? @tips : \@tips; | ||
720 : | golsen | 1.1 | } |
721 : | |||
722 : | |||
723 : | #------------------------------------------------------------------------------- | ||
724 : | # First tip node in tree: | ||
725 : | # | ||
726 : | # $tipref = newick_first_tip_ref( $node ) | ||
727 : | #------------------------------------------------------------------------------- | ||
728 : | sub newick_first_tip_ref { | ||
729 : | my ( $node, $not_root ) = @_; | ||
730 : | valid_node( $node ) || return undef; | ||
731 : | |||
732 : | # Arrived at tip, or start of a tip-rooted tree? | ||
733 : | my $n = newick_n_desc( $node ); | ||
734 : | if ( ( $n < 1 ) || ( $n == 1 && ! $not_root ) ) { return $node } | ||
735 : | |||
736 : | newick_first_tip_ref( newick_desc_i( $node, 1 ), 1 ); | ||
737 : | } | ||
738 : | |||
739 : | |||
740 : | #------------------------------------------------------------------------------- | ||
741 : | # First tip name in tree: | ||
742 : | # | ||
743 : | # $tip = newick_first_tip( $node ) | ||
744 : | #------------------------------------------------------------------------------- | ||
745 : | sub newick_first_tip { | ||
746 : | my ( $noderef ) = @_; | ||
747 : | |||
748 : | my $tipref; | ||
749 : | array_ref( $tipref = newick_first_tip_ref( $noderef ) ) ? newick_lbl( $tipref ) | ||
750 : | golsen | 1.9 | : undef; |
751 : | golsen | 1.1 | } |
752 : | |||
753 : | |||
754 : | #------------------------------------------------------------------------------- | ||
755 : | # List of duplicated tip labels. | ||
756 : | # | ||
757 : | # @tips = newick_duplicated_tips( $node ) | ||
758 : | golsen | 1.9 | # \@tips = newick_duplicated_tips( $node ) |
759 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
760 : | sub newick_duplicated_tips { | ||
761 : | golsen | 1.9 | my @tips = &duplicates( newick_tip_list( $_[0] ) ); |
762 : | wantarray ? @tips : \@tips; | ||
763 : | golsen | 1.1 | } |
764 : | |||
765 : | |||
766 : | #------------------------------------------------------------------------------- | ||
767 : | # Tip in tree? | ||
768 : | # | ||
769 : | # $bool = newick_tip_in_tree( $node, $tipname ) | ||
770 : | #------------------------------------------------------------------------------- | ||
771 : | sub newick_tip_in_tree { | ||
772 : | my ( $node, $tip, $not_root ) = @_; | ||
773 : | |||
774 : | my $n = newick_n_desc( $node ); | ||
775 : | if ( $n < 1 ) { return ( newick_lbl( $node ) eq $tip) ? 1 : 0 } | ||
776 : | |||
777 : | # Special case for tree rooted on tip | ||
778 : | |||
779 : | if ( $n == 1 && ( ! $not_root ) && newick_lbl( $node ) eq $tip ) { return 1 } | ||
780 : | |||
781 : | foreach ( newick_desc_list( $node ) ) { | ||
782 : | if ( newick_tip_in_tree( $_, $tip, 1 ) ) { return 1 } | ||
783 : | } | ||
784 : | |||
785 : | 0; # Fall through means not found | ||
786 : | } | ||
787 : | |||
788 : | |||
789 : | #------------------------------------------------------------------------------- | ||
790 : | # Tips shared between 2 trees. | ||
791 : | # | ||
792 : | # @tips = newick_shared_tips( $tree1, $tree2 ) | ||
793 : | golsen | 1.9 | # \@tips = newick_shared_tips( $tree1, $tree2 ) |
794 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
795 : | sub newick_shared_tips { | ||
796 : | golsen | 1.9 | my ( $tree1, $tree2 ) = @_; |
797 : | my $tips1 = newick_tip_list( $tree1 ); | ||
798 : | my $tips2 = newick_tip_list( $tree2 ); | ||
799 : | my @tips = &intersection( $tips1, $tips2 ); | ||
800 : | wantarray ? @tips : \@tips; | ||
801 : | golsen | 1.1 | } |
802 : | |||
803 : | |||
804 : | #------------------------------------------------------------------------------- | ||
805 : | # Tree length. | ||
806 : | # | ||
807 : | # $length = newick_tree_length( $node ) | ||
808 : | #------------------------------------------------------------------------------- | ||
809 : | sub newick_tree_length { | ||
810 : | my ( $node, $not_root ) = @_; | ||
811 : | |||
812 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
813 : | defined( $x ) || ( $x = 1 ); # Convert undefined to 1 | ||
814 : | |||
815 : | foreach ( newick_desc_list( $node ) ) { $x += newick_tree_length( $_, 1 ) } | ||
816 : | |||
817 : | $x; | ||
818 : | } | ||
819 : | |||
820 : | |||
821 : | #------------------------------------------------------------------------------- | ||
822 : | golsen | 1.9 | # Hash of tip nodes and corresponding distances from root: |
823 : | # | ||
824 : | # %tip_distances = newick_tip_distances( $node ) | ||
825 : | # \%tip_distances = newick_tip_distances( $node ) | ||
826 : | #------------------------------------------------------------------------------- | ||
827 : | sub newick_tip_distances | ||
828 : | { | ||
829 : | my ( $node, $x, $hash ) = @_; | ||
830 : | my $root = ! $hash; | ||
831 : | ref( $hash ) eq 'HASH' or $hash = {}; | ||
832 : | |||
833 : | $x ||= 0; | ||
834 : | $x += newick_x( $node ) || 0; | ||
835 : | |||
836 : | # Is it a tip? | ||
837 : | |||
838 : | my $n_desc = newick_n_desc( $node ); | ||
839 : | if ( ! $n_desc ) | ||
840 : | { | ||
841 : | $hash->{ newick_lbl( $node ) } = $x; | ||
842 : | return $hash; | ||
843 : | } | ||
844 : | |||
845 : | # Tree rooted on tip? | ||
846 : | |||
847 : | if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) ) | ||
848 : | { | ||
849 : | $hash->{ newick_lbl( $node ) } = 0; # Distance to root is zero | ||
850 : | } | ||
851 : | |||
852 : | foreach ( newick_desc_list( $node ) ) | ||
853 : | { | ||
854 : | newick_tip_distances( $_, $x, $hash ); | ||
855 : | } | ||
856 : | |||
857 : | wantarray ? %$hash : $hash; | ||
858 : | } | ||
859 : | |||
860 : | |||
861 : | #------------------------------------------------------------------------------- | ||
862 : | golsen | 1.1 | # Tree max X. |
863 : | # | ||
864 : | # $xmax = newick_max_X( $node ) | ||
865 : | #------------------------------------------------------------------------------- | ||
866 : | sub newick_max_X { | ||
867 : | my ( $node, $not_root ) = @_; | ||
868 : | |||
869 : | my $xmax = 0; | ||
870 : | foreach ( newick_desc_list( $node ) ) { | ||
871 : | my $x = newick_max_X( $_, 1 ); | ||
872 : | if ( $x > $xmax ) { $xmax = $x } | ||
873 : | } | ||
874 : | |||
875 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
876 : | $xmax + ( defined( $x ) ? $x : 1 ); # Convert undefined to 1 | ||
877 : | } | ||
878 : | |||
879 : | |||
880 : | #------------------------------------------------------------------------------- | ||
881 : | # Most distant tip from root: distance and path. | ||
882 : | # | ||
883 : | # ( $xmax, @path ) = newick_most_distant_tip_path( $tree ) | ||
884 : | #------------------------------------------------------------------------------- | ||
885 : | sub newick_most_distant_tip_path { | ||
886 : | my ( $node, $not_root ) = @_; | ||
887 : | |||
888 : | my $imax = newick_n_desc( $node ); | ||
889 : | my $xmax = ( $imax > 0 ) ? -1 : 0; | ||
890 : | my @pmax = (); | ||
891 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
892 : | my ( $x, @path ) = newick_most_distant_tip_path( newick_desc_i( $node, $i ), 1 ); | ||
893 : | if ( $x > $xmax ) { $xmax = $x; @pmax = ( $i, @path ) } | ||
894 : | } | ||
895 : | |||
896 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
897 : | $xmax += defined( $x ) ? $x : 0; # Convert undefined to 1 | ||
898 : | ( $xmax, $node, @pmax ); | ||
899 : | } | ||
900 : | |||
901 : | |||
902 : | #------------------------------------------------------------------------------- | ||
903 : | # Most distant tip from root, and its distance. | ||
904 : | # | ||
905 : | # ( $tipref, $xmax ) = newick_most_distant_tip_ref( $tree ) | ||
906 : | #------------------------------------------------------------------------------- | ||
907 : | sub newick_most_distant_tip_ref { | ||
908 : | my ( $node, $not_root ) = @_; | ||
909 : | |||
910 : | my $imax = newick_n_desc( $node ); | ||
911 : | my $xmax = ( $imax > 0 ) ? -1 : 0; | ||
912 : | my $tmax = $node; | ||
913 : | foreach ( newick_desc_list( $node ) ) { | ||
914 : | my ( $t, $x ) = newick_most_distant_tip_ref( $_, 1 ); | ||
915 : | if ( $x > $xmax ) { $xmax = $x; $tmax = $t } | ||
916 : | } | ||
917 : | |||
918 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
919 : | $xmax += defined( $x ) ? $x : 1; # Convert undefined to 1 | ||
920 : | ( $tmax, $xmax ); | ||
921 : | } | ||
922 : | |||
923 : | |||
924 : | #------------------------------------------------------------------------------- | ||
925 : | # Name of most distant tip from root, and its distance. | ||
926 : | # | ||
927 : | # ( $tipname, $xmax ) = newick_most_distant_tip_name( $tree ) | ||
928 : | #------------------------------------------------------------------------------- | ||
929 : | sub newick_most_distant_tip_name { | ||
930 : | my ( $tipref, $xmax ) = newick_most_distant_tip_ref( $_[0] ); | ||
931 : | ( newick_lbl( $tipref ), $xmax ) | ||
932 : | } | ||
933 : | |||
934 : | |||
935 : | #------------------------------------------------------------------------------- | ||
936 : | golsen | 1.8 | # Tree tip insertion point (with standard node labels): |
937 : | # | ||
938 : | # [ $node1, $x1, $node2, $x2, $x ] | ||
939 : | # = newick_tip_insertion_point( $tree, $tip ) | ||
940 : | # | ||
941 : | # Which means: tip is on a branch of length x that is inserted into the branch | ||
942 : | # connecting node1 and node2, at distance x1 from node1 and x2 from node2. | ||
943 : | # | ||
944 : | # x1 +------ n1a (lowest sorting tip of this subtree) | ||
945 : | # +--------n1 | ||
946 : | # | +------n1b (lowest sorting tip of this subtree) | ||
947 : | # tip-------n | ||
948 : | # x | +------------- n2a (lowest sorting tip of this subtree) | ||
949 : | # +------n2 | ||
950 : | # x2 +-------- n2b (lowest sorting tip of this subtree) | ||
951 : | # | ||
952 : | # The designations of 1 vs 2, and a vs b are chosen such that: | ||
953 : | # n1a < n1b, and n2a < n2b, and n1a < n2a | ||
954 : | # | ||
955 : | # Then the statandard description becomes: | ||
956 : | # | ||
957 : | # [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1, | ||
958 : | # [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2, | ||
959 : | # x | ||
960 : | # ] | ||
961 : | # | ||
962 : | #------------------------------------------------------------------------------- | ||
963 : | sub newick_tip_insertion_point | ||
964 : | { | ||
965 : | my ( $tree, $tip ) = @_; | ||
966 : | $tree && $tip && ref( $tree ) eq 'ARRAY' or return undef; | ||
967 : | $tree = copy_newick_tree( $tree ) or return undef; | ||
968 : | $tree = reroot_newick_to_tip( $tree, $tip ) or return undef; | ||
969 : | my $node = $tree; | ||
970 : | |||
971 : | my $x = 0; # Distance to node | ||
972 : | my $dl = newick_desc_ref( $node ); # Descendent list of tip node; | ||
973 : | $node = $dl->[0]; # Node adjacent to tip | ||
974 : | $dl = newick_desc_ref( $node ); | ||
975 : | while ( $dl && ( @$dl == 1 ) ) # Traverse unbranched nodes | ||
976 : | { | ||
977 : | $node = $dl->[0]; | ||
978 : | $x += newick_x( $node ); | ||
979 : | $dl = newick_desc_ref( $node ); | ||
980 : | } | ||
981 : | $x += newick_x( $node ); | ||
982 : | |||
983 : | # We are now at the node that is the insertion point. | ||
984 : | # Is it a tip? | ||
985 : | |||
986 : | my @description; | ||
987 : | |||
988 : | if ( ( ! $dl ) || @$dl == 0 ) | ||
989 : | { | ||
990 : | @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x ); | ||
991 : | } | ||
992 : | |||
993 : | # Is it a trifurcation or greater, in which case it does not go | ||
994 : | # away with tip deletion? | ||
995 : | |||
996 : | elsif ( @$dl > 2 ) | ||
997 : | { | ||
998 : | @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x ); | ||
999 : | } | ||
1000 : | |||
1001 : | # The node is bifurcating. We need to describe it. | ||
1002 : | |||
1003 : | else | ||
1004 : | { | ||
1005 : | golsen | 1.9 | my ( $n1, $x1 ) = describe_descendant( $dl->[0] ); |
1006 : | my ( $n2, $x2 ) = describe_descendant( $dl->[1] ); | ||
1007 : | golsen | 1.8 | |
1008 : | if ( @$n1 == 2 ) { push @$n1, $n2->[0] } | ||
1009 : | if ( @$n2 == 2 ) | ||
1010 : | { | ||
1011 : | @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] ); | ||
1012 : | } | ||
1013 : | if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 } | ||
1014 : | @description = ( $n1, $x1, $n2, $x2, $x ); | ||
1015 : | } | ||
1016 : | |||
1017 : | return wantarray ? @description : \@description; | ||
1018 : | } | ||
1019 : | |||
1020 : | |||
1021 : | golsen | 1.9 | sub describe_descendant |
1022 : | golsen | 1.8 | { |
1023 : | my $node = shift; | ||
1024 : | |||
1025 : | my $x = 0; # Distance to node | ||
1026 : | my $dl = newick_desc_ref( $node ); # Descendent list of tip node; | ||
1027 : | while ( $dl && ( @$dl == 1 ) ) # Traverse unbranched nodes | ||
1028 : | { | ||
1029 : | $node = $dl->[0]; | ||
1030 : | golsen | 1.9 | $x += newick_x( $node ); |
1031 : | golsen | 1.8 | $dl = newick_desc_ref( $node ); |
1032 : | } | ||
1033 : | $x += newick_x( $node ); | ||
1034 : | |||
1035 : | # Is it a tip? Return list of one tip; | ||
1036 : | |||
1037 : | if ( ( ! $dl ) || @$dl == 0 ) | ||
1038 : | { | ||
1039 : | return ( [ newick_lbl( $node ) ], $x ); | ||
1040 : | } | ||
1041 : | |||
1042 : | # Get tips of each descendent, keeping lowest sorting from each. | ||
1043 : | # Return the two lowest of those (the third will come from the | ||
1044 : | # other side of the original node). | ||
1045 : | |||
1046 : | golsen | 1.9 | my @rep_tips = sort { lc $a cmp lc $b } |
1047 : | map { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] } | ||
1048 : | @$dl; | ||
1049 : | return ( [ @rep_tips[0,1] ], $x ); | ||
1050 : | golsen | 1.8 | } |
1051 : | |||
1052 : | |||
1053 : | #------------------------------------------------------------------------------- | ||
1054 : | golsen | 1.1 | # Standard node name: |
1055 : | # Tip label if at a tip | ||
1056 : | # Three sorted tip labels intersecting at node, each being smallest | ||
1057 : | # of all the tips of their subtrees | ||
1058 : | # | ||
1059 : | golsen | 1.9 | # @TipOrTips = std_node_name( $tree, $node ) |
1060 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
1061 : | sub std_node_name { | ||
1062 : | my $tree = $_[0]; | ||
1063 : | |||
1064 : | # Node reference is last element of path to node | ||
1065 : | |||
1066 : | my $noderef = ( path_to_node( @_ ) )[-1]; | ||
1067 : | defined( $noderef ) || return (); | ||
1068 : | |||
1069 : | if ( node_is_tip( $noderef ) || $noderef eq $tree ) { # Is it a tip? | ||
1070 : | return newick_lbl( $noderef ); | ||
1071 : | } | ||
1072 : | |||
1073 : | # Work through lists of tips in descendant subtrees, removing them from | ||
1074 : | # @rest, and keeping the best tip for each subtree. | ||
1075 : | |||
1076 : | golsen | 1.8 | my @rest = newick_tip_list( $tree ); |
1077 : | golsen | 1.9 | my @best = map |
1078 : | { | ||
1079 : | golsen | 1.8 | my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ ); |
1080 : | golsen | 1.9 | @rest = &set_difference( \@rest, \@tips ); |
1081 : | golsen | 1.1 | $tips[0]; |
1082 : | golsen | 1.9 | } newick_desc_list( $noderef ); |
1083 : | golsen | 1.1 | |
1084 : | # Best of the rest of the tree | ||
1085 : | push @best, ( sort { lc $a cmp lc $b } @rest )[0]; | ||
1086 : | |||
1087 : | # Take the top 3, in order: | ||
1088 : | |||
1089 : | ( @best >= 3 ) ? ( sort { lc $a cmp lc $b } @best )[0 .. 2] : (); | ||
1090 : | } | ||
1091 : | |||
1092 : | |||
1093 : | #=============================================================================== | ||
1094 : | # Functions to find paths in trees. | ||
1095 : | # | ||
1096 : | # Path descriptions are of form: | ||
1097 : | # ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN ) # Always odd | ||
1098 : | # () is returned upon failure | ||
1099 : | # | ||
1100 : | # Numbering of descendants is 1-based. | ||
1101 : | #=============================================================================== | ||
1102 : | # Path to tip: | ||
1103 : | # | ||
1104 : | # @path = path_to_tip( $treenode, $tipname ) | ||
1105 : | #------------------------------------------------------------------------------- | ||
1106 : | sub path_to_tip { | ||
1107 : | my ( $node, $tip, @path0 ) = @_; | ||
1108 : | |||
1109 : | push @path0, $node; | ||
1110 : | my $imax = newick_n_desc( $node ); | ||
1111 : | if ( $imax < 1 ) { return ( newick_lbl( $node ) eq $tip ) ? @path0 : () } | ||
1112 : | |||
1113 : | # Special case for tree rooted on tip | ||
1114 : | |||
1115 : | if ( ( $imax == 1 ) # One descendant | ||
1116 : | && ( @path0 == 1 ) # First step in path | ||
1117 : | && ( newick_lbl( $node ) eq $tip ) # Label matches | ||
1118 : | ) { return @path0 } | ||
1119 : | |||
1120 : | my @path; | ||
1121 : | for (my $i = 1; $i <= $imax; $i++ ) { | ||
1122 : | @path = path_to_tip( newick_desc_i( $node, $i ), $tip, ( @path0, $i ) ); | ||
1123 : | if ( @path ) { return @path } | ||
1124 : | } | ||
1125 : | |||
1126 : | (); # Not found | ||
1127 : | } | ||
1128 : | |||
1129 : | |||
1130 : | #------------------------------------------------------------------------------- | ||
1131 : | # Path to named node. | ||
1132 : | # Like path to tip, but will find named internal nodes as well. | ||
1133 : | # | ||
1134 : | # @path = path_to_named_node( $treenode, $name ) | ||
1135 : | #------------------------------------------------------------------------------- | ||
1136 : | sub path_to_named_node { | ||
1137 : | my ( $node, $name, @path0 ) = @_; | ||
1138 : | |||
1139 : | push @path0, $node; | ||
1140 : | if ( newick_lbl( $node ) eq $name ) { return @path0 } | ||
1141 : | |||
1142 : | my @path; | ||
1143 : | my $imax = newick_n_desc( $node ); | ||
1144 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
1145 : | @path = path_to_named_node( newick_desc_i( $node, $i ), $name, ( @path0, $i ) ); | ||
1146 : | if ( @path ) { return @path } | ||
1147 : | } | ||
1148 : | |||
1149 : | (); # Not found | ||
1150 : | } | ||
1151 : | |||
1152 : | |||
1153 : | #------------------------------------------------------------------------------- | ||
1154 : | # Path to node reference. | ||
1155 : | # | ||
1156 : | # @path = path_to_node_ref( $treenode, $noderef ) | ||
1157 : | #------------------------------------------------------------------------------- | ||
1158 : | sub path_to_node_ref { | ||
1159 : | my ( $node, $noderef, @path0 ) = @_; | ||
1160 : | |||
1161 : | push @path0, $node; | ||
1162 : | if ( $node eq $noderef ) { return @path0 } | ||
1163 : | |||
1164 : | my @path; | ||
1165 : | my $imax = newick_n_desc( $node ); | ||
1166 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
1167 : | @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) ); | ||
1168 : | if ( @path ) { return @path } | ||
1169 : | } | ||
1170 : | |||
1171 : | (); # Not found | ||
1172 : | } | ||
1173 : | |||
1174 : | |||
1175 : | #------------------------------------------------------------------------------- | ||
1176 : | # Path to node, as defined by 1 or 3 tips. | ||
1177 : | # | ||
1178 : | # @path = path_to_node( $node, $tip1, $tip2, $tip3 ) # 3 tip names | ||
1179 : | # @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] ) # Allow array ref | ||
1180 : | # @path = path_to_node( $node, $tip1 ) # Use path_to_tip | ||
1181 : | # @path = path_to_node( $node, [ $tip1 ] ) # Use path_to_tip | ||
1182 : | #------------------------------------------------------------------------------- | ||
1183 : | sub path_to_node { | ||
1184 : | my ( $node, $tip1, $tip2, $tip3 ) = @_; | ||
1185 : | array_ref( $node ) && defined( $tip1 ) || return (); | ||
1186 : | |||
1187 : | # Allow arg 2 to be an array reference | ||
1188 : | if ( array_ref( $tip1 ) ) { ( $tip1, $tip2, $tip3 ) = @$tip1 } | ||
1189 : | |||
1190 : | my @p1 = path_to_tip( $node, $tip1 ); # Path to first tip | ||
1191 : | @p1 || return (); # Was the tip found? | ||
1192 : | defined( $tip2 ) && defined( $tip3 ) || return @p1; # Two more defined? | ||
1193 : | |||
1194 : | my @p2 = path_to_tip( $node, $tip2 ); | ||
1195 : | my @p3 = path_to_tip( $node, $tip3 ); | ||
1196 : | @p2 && @p3 || return (); # Were they found? | ||
1197 : | |||
1198 : | # Find the common prefix for each pair of paths | ||
1199 : | golsen | 1.9 | my @p12 = &common_prefix( \@p1, \@p2 ); |
1200 : | my @p13 = &common_prefix( \@p1, \@p3 ); | ||
1201 : | my @p23 = &common_prefix( \@p2, \@p3 ); | ||
1202 : | golsen | 1.1 | |
1203 : | # Return the longest common prefix of any two paths | ||
1204 : | ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 : | ||
1205 : | ( @p13 >= @p23 ) ? @p13 : | ||
1206 : | @p23 ; | ||
1207 : | } | ||
1208 : | |||
1209 : | |||
1210 : | #------------------------------------------------------------------------------- | ||
1211 : | # Distance along path. | ||
1212 : | # | ||
1213 : | # $distance = newick_path_length( @path ) | ||
1214 : | #------------------------------------------------------------------------------- | ||
1215 : | sub newick_path_length { | ||
1216 : | my $node = shift; # Discard the first node | ||
1217 : | array_ref( $node ) || return undef; | ||
1218 : | @_ ? distance_along_path_2( @_ ) : 0; | ||
1219 : | } | ||
1220 : | |||
1221 : | |||
1222 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1223 : | # This expects to get path minus root node: | ||
1224 : | # | ||
1225 : | # $distance = distance_along_path_2( @path ) | ||
1226 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1227 : | sub distance_along_path_2 { | ||
1228 : | shift; # Discard descendant number | ||
1229 : | my $node = shift; | ||
1230 : | array_ref( $node ) || return undef; | ||
1231 : | my $d1 = newick_x( $node ); | ||
1232 : | golsen | 1.8 | my $d2 = @_ ? distance_along_path_2( @_ ) : 0; |
1233 : | golsen | 1.1 | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; |
1234 : | } | ||
1235 : | |||
1236 : | |||
1237 : | #------------------------------------------------------------------------------- | ||
1238 : | # Tip-to-tip distance. | ||
1239 : | # | ||
1240 : | # $distance = tip_to_tip_distance( $tree, $tip1, $tip2 ) | ||
1241 : | #------------------------------------------------------------------------------- | ||
1242 : | sub tip_to_tip_distance { | ||
1243 : | my ( $node, $tip1, $tip2 ) = @_; | ||
1244 : | |||
1245 : | array_ref( $node ) && defined( $tip1 ) | ||
1246 : | && defined( $tip2 ) || return undef; | ||
1247 : | my @p1 = path_to_tip( $node, $tip1 ); | ||
1248 : | my @p2 = path_to_tip( $node, $tip2 ); | ||
1249 : | @p1 && @p2 || return undef; # Were they found? | ||
1250 : | |||
1251 : | # Find the unique suffixes of the two paths | ||
1252 : | golsen | 1.9 | my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost |
1253 : | golsen | 1.1 | my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0; |
1254 : | my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0; | ||
1255 : | |||
1256 : | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; | ||
1257 : | } | ||
1258 : | |||
1259 : | |||
1260 : | #------------------------------------------------------------------------------- | ||
1261 : | # Node-to-node distance. | ||
1262 : | # Nodes can be: $tipname | ||
1263 : | # [ $tipname ] | ||
1264 : | # [ $tipname1, $tipname2, $tipname3 ] | ||
1265 : | # | ||
1266 : | # $distance = node_to_node_distance( $tree, $node1, $node2 ) | ||
1267 : | #------------------------------------------------------------------------------- | ||
1268 : | sub node_to_node_distance { | ||
1269 : | my ( $node, $node1, $node2 ) = @_; | ||
1270 : | |||
1271 : | array_ref( $node ) && defined( $node1 ) | ||
1272 : | && defined( $node2 ) || return undef; | ||
1273 : | golsen | 1.8 | my @p1 = path_to_node( $node, $node1 ) or return undef; |
1274 : | my @p2 = path_to_node( $node, $node2 ) or return undef; | ||
1275 : | golsen | 1.1 | |
1276 : | # Find the unique suffixes of the two paths | ||
1277 : | golsen | 1.9 | my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost |
1278 : | golsen | 1.1 | my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0; |
1279 : | my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0; | ||
1280 : | |||
1281 : | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; | ||
1282 : | } | ||
1283 : | |||
1284 : | |||
1285 : | #=============================================================================== | ||
1286 : | # Tree manipulations: | ||
1287 : | #=============================================================================== | ||
1288 : | # Copy tree. | ||
1289 : | # Lists are copied, except that references to empty lists go to undef. | ||
1290 : | # Only defined fields are added, so tree list may be shorter than 8 fields. | ||
1291 : | # | ||
1292 : | overbeek | 1.4 | # $treecopy = copy_newick_tree( $tree ) |
1293 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
1294 : | overbeek | 1.4 | sub copy_newick_tree { |
1295 : | golsen | 1.1 | my ( $node ) = @_; |
1296 : | array_ref( $node ) || return undef; | ||
1297 : | |||
1298 : | my $nn = []; # Reference to a new node structure | ||
1299 : | # Build a new descendant list, if not empty | ||
1300 : | my @dl = newick_desc_list( $node ); | ||
1301 : | overbeek | 1.4 | set_newick_desc_ref( $nn, @dl ? [ map { copy_newick_tree( $_ ) } @dl ] |
1302 : | golsen | 1.1 | : undef |
1303 : | ); | ||
1304 : | |||
1305 : | # Copy label and x, if defined | ||
1306 : | my ( $l, $x ); | ||
1307 : | if ( defined( $l = newick_lbl( $node ) ) ) { set_newick_lbl( $nn, $l ) } | ||
1308 : | if ( defined( $x = newick_x( $node ) ) ) { set_newick_x( $nn, $x ) } | ||
1309 : | |||
1310 : | # Build new comment lists, when not empty ( does not extend array unless | ||
1311 : | # necessary) | ||
1312 : | my $c; | ||
1313 : | if ( $c = newick_c1( $node ) and @$c ) { set_newick_c1( $nn, [ @$c ] ) } | ||
1314 : | if ( $c = newick_c2( $node ) and @$c ) { set_newick_c2( $nn, [ @$c ] ) } | ||
1315 : | if ( $c = newick_c3( $node ) and @$c ) { set_newick_c3( $nn, [ @$c ] ) } | ||
1316 : | if ( $c = newick_c4( $node ) and @$c ) { set_newick_c4( $nn, [ @$c ] ) } | ||
1317 : | if ( $c = newick_c5( $node ) and @$c ) { set_newick_c5( $nn, [ @$c ] ) } | ||
1318 : | |||
1319 : | $nn; | ||
1320 : | } | ||
1321 : | |||
1322 : | |||
1323 : | #------------------------------------------------------------------------------- | ||
1324 : | # Use a hash to relabel the nodes in a newick tree. | ||
1325 : | # | ||
1326 : | # $newtree = newick_relabel_nodes( $node, \%new_name ) | ||
1327 : | #------------------------------------------------------------------------------- | ||
1328 : | sub newick_relabel_nodes { | ||
1329 : | my ( $node, $new_name ) = @_; | ||
1330 : | |||
1331 : | my ( $lbl, $new ); | ||
1332 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1333 : | && ( $lbl ne "" ) | ||
1334 : | && defined( $new = $new_name->{ $lbl } ) | ||
1335 : | ) { | ||
1336 : | set_newick_lbl( $node, $new ); | ||
1337 : | } | ||
1338 : | |||
1339 : | foreach ( newick_desc_list( $node ) ) { | ||
1340 : | newick_relabel_nodes( $_, $new_name ); | ||
1341 : | } | ||
1342 : | |||
1343 : | $node; | ||
1344 : | } | ||
1345 : | |||
1346 : | |||
1347 : | #------------------------------------------------------------------------------- | ||
1348 : | # Use a hash to relabel the nodes in a newick tree (case insensitive). | ||
1349 : | # | ||
1350 : | # $newtree = newick_relabel_nodes_i( $node, \%new_name ) | ||
1351 : | #------------------------------------------------------------------------------- | ||
1352 : | sub newick_relabel_nodes_i { | ||
1353 : | my ( $node, $new_name ) = @_; | ||
1354 : | |||
1355 : | # Add any necessary lowercase keys to the hash: | ||
1356 : | |||
1357 : | my $lc_lbl; | ||
1358 : | foreach ( keys %$new_name ) { | ||
1359 : | $lc_lbl = lc $_; | ||
1360 : | ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } ); | ||
1361 : | } | ||
1362 : | |||
1363 : | newick_relabel_nodes_i2( $node, $new_name ); | ||
1364 : | } | ||
1365 : | |||
1366 : | |||
1367 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1368 : | # Do the actual relabeling | ||
1369 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1370 : | sub newick_relabel_nodes_i2 { | ||
1371 : | my ( $node, $new_name ) = @_; | ||
1372 : | |||
1373 : | my ( $lbl, $new ); | ||
1374 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1375 : | && ( $lbl ne "" ) | ||
1376 : | && defined( $new = $new_name->{ lc $lbl } ) | ||
1377 : | ) { | ||
1378 : | set_newick_lbl( $node, $new ); | ||
1379 : | } | ||
1380 : | |||
1381 : | foreach ( newick_desc_list( $node ) ) { | ||
1382 : | newick_relabel_nodes_i2( $_, $new_name ); | ||
1383 : | } | ||
1384 : | |||
1385 : | $node; | ||
1386 : | } | ||
1387 : | |||
1388 : | |||
1389 : | #------------------------------------------------------------------------------- | ||
1390 : | # Use a hash to relabel the tips in a newick tree. | ||
1391 : | # | ||
1392 : | # $newtree = newick_relabel_tips( $node, \%new_name ) | ||
1393 : | #------------------------------------------------------------------------------- | ||
1394 : | sub newick_relabel_tips { | ||
1395 : | my ( $node, $new_name ) = @_; | ||
1396 : | |||
1397 : | my @desc = newick_desc_list( $node ); | ||
1398 : | |||
1399 : | if ( @desc ) { | ||
1400 : | foreach ( @desc ) { newick_relabel_tips( $_, $new_name ) } | ||
1401 : | } | ||
1402 : | else { | ||
1403 : | my ( $lbl, $new ); | ||
1404 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1405 : | && ( $lbl ne "" ) | ||
1406 : | && defined( $new = $new_name->{ $lbl } ) | ||
1407 : | ) { | ||
1408 : | set_newick_lbl( $node, $new ); | ||
1409 : | } | ||
1410 : | } | ||
1411 : | |||
1412 : | $node; | ||
1413 : | } | ||
1414 : | |||
1415 : | |||
1416 : | #------------------------------------------------------------------------------- | ||
1417 : | # Use a hash to relabel the tips in a newick tree (case insensitive). | ||
1418 : | # | ||
1419 : | # $newtree = newick_relabel_tips_i( $node, \%new_name ) | ||
1420 : | #------------------------------------------------------------------------------- | ||
1421 : | sub newick_relabel_tips_i { | ||
1422 : | my ( $node, $new_name ) = @_; | ||
1423 : | |||
1424 : | # Add any necessary lowercase keys to the hash: | ||
1425 : | |||
1426 : | my $lc_lbl; | ||
1427 : | foreach ( keys %$new_name ) { | ||
1428 : | $lc_lbl = lc $_; | ||
1429 : | ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } ); | ||
1430 : | } | ||
1431 : | |||
1432 : | newick_relabel_tips_i2( $node, $new_name ); | ||
1433 : | } | ||
1434 : | |||
1435 : | |||
1436 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1437 : | # Do the actual relabeling | ||
1438 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1439 : | sub newick_relabel_tips_i2 { | ||
1440 : | my ( $node, $new_name ) = @_; | ||
1441 : | |||
1442 : | my @desc = newick_desc_list( $node ); | ||
1443 : | |||
1444 : | if ( @desc ) { | ||
1445 : | foreach ( @desc ) { newick_relabel_tips_i2( $_, $new_name ) } | ||
1446 : | } | ||
1447 : | else { | ||
1448 : | my ( $lbl, $new ); | ||
1449 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1450 : | && ( $lbl ne "" ) | ||
1451 : | && defined( $new = $new_name->{ lc $lbl } ) | ||
1452 : | ) { | ||
1453 : | set_newick_lbl( $node, $new ); | ||
1454 : | } | ||
1455 : | } | ||
1456 : | |||
1457 : | $node; | ||
1458 : | } | ||
1459 : | |||
1460 : | |||
1461 : | #------------------------------------------------------------------------------- | ||
1462 : | # Set undefined branch lenghts (except root) to length x. | ||
1463 : | # | ||
1464 : | # $n_changed = newick_set_undefined_branches( $node, $x ) | ||
1465 : | #------------------------------------------------------------------------------- | ||
1466 : | sub newick_set_undefined_branches { | ||
1467 : | my ( $node, $x, $not_root ) = @_; | ||
1468 : | |||
1469 : | my $n = 0; | ||
1470 : | if ( $not_root && ! defined( newick_x( $node ) ) ) { | ||
1471 : | set_newick_x( $node, $x ); | ||
1472 : | $n++; | ||
1473 : | } | ||
1474 : | |||
1475 : | foreach ( newick_desc_list( $node ) ) { | ||
1476 : | $n += newick_set_undefined_branches( $_, $x, 1 ); | ||
1477 : | } | ||
1478 : | |||
1479 : | $n; | ||
1480 : | } | ||
1481 : | |||
1482 : | |||
1483 : | #------------------------------------------------------------------------------- | ||
1484 : | # Set all branch lenghts (except root) to length x. | ||
1485 : | # | ||
1486 : | # $n_changed = newick_set_all_branches( $node, $x ) | ||
1487 : | #------------------------------------------------------------------------------- | ||
1488 : | sub newick_set_all_branches { | ||
1489 : | my ( $node, $x, $not_root ) = @_; | ||
1490 : | |||
1491 : | my $n = 0; | ||
1492 : | overbeek | 1.7 | if ( $not_root ) |
1493 : | { | ||
1494 : | golsen | 1.1 | set_newick_x( $node, $x ); |
1495 : | $n++; | ||
1496 : | } | ||
1497 : | |||
1498 : | overbeek | 1.7 | foreach ( newick_desc_list( $node ) ) |
1499 : | { | ||
1500 : | golsen | 1.1 | $n += newick_set_all_branches( $_, $x, 1 ); |
1501 : | } | ||
1502 : | |||
1503 : | $n; | ||
1504 : | } | ||
1505 : | |||
1506 : | |||
1507 : | #------------------------------------------------------------------------------- | ||
1508 : | overbeek | 1.7 | # Rescale all branch lenghts by factor. |
1509 : | # | ||
1510 : | # $node = newick_rescale_branches( $node, $factor ) | ||
1511 : | #------------------------------------------------------------------------------- | ||
1512 : | sub newick_rescale_branches { | ||
1513 : | my ( $node, $factor ) = @_; | ||
1514 : | |||
1515 : | my $x = newick_x( $node ); | ||
1516 : | set_newick_x( $node, $factor * $x ) if $x; | ||
1517 : | |||
1518 : | foreach ( newick_desc_list( $node ) ) | ||
1519 : | { | ||
1520 : | newick_rescale_branches( $_, $factor ); | ||
1521 : | } | ||
1522 : | |||
1523 : | $node; | ||
1524 : | } | ||
1525 : | |||
1526 : | |||
1527 : | #------------------------------------------------------------------------------- | ||
1528 : | golsen | 1.5 | # Set negative branches to zero. The original tree is modfied. |
1529 : | # | ||
1530 : | # $n_changed = newick_fix_negative_branches( $tree ) | ||
1531 : | #------------------------------------------------------------------------------- | ||
1532 : | sub newick_fix_negative_branches { | ||
1533 : | my ( $tree ) = @_; | ||
1534 : | array_ref( $tree ) or return undef; | ||
1535 : | my $n_changed = 0; | ||
1536 : | my $x = newick_x( $tree ); | ||
1537 : | if ( defined( $x ) and $x < 0 ) | ||
1538 : | { | ||
1539 : | set_newick_x( $tree, 0 ); | ||
1540 : | $n_changed++; | ||
1541 : | } | ||
1542 : | |||
1543 : | foreach ( newick_desc_list( $tree ) ) | ||
1544 : | { | ||
1545 : | $n_changed += newick_fix_negative_branches( $_ ); | ||
1546 : | } | ||
1547 : | |||
1548 : | $n_changed; | ||
1549 : | } | ||
1550 : | |||
1551 : | |||
1552 : | #------------------------------------------------------------------------------- | ||
1553 : | golsen | 1.8 | # Remove comments from a newick tree (e.g., before writing for phylip). |
1554 : | # | ||
1555 : | # $node = newick_strip_comments( $node ) | ||
1556 : | #------------------------------------------------------------------------------- | ||
1557 : | sub newick_strip_comments { | ||
1558 : | my ( $node ) = @_; | ||
1559 : | |||
1560 : | @$node = @$node[ 0 .. 2 ]; | ||
1561 : | foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) } | ||
1562 : | $node; | ||
1563 : | } | ||
1564 : | |||
1565 : | |||
1566 : | #------------------------------------------------------------------------------- | ||
1567 : | golsen | 1.1 | # Normalize tree order (in place). |
1568 : | # | ||
1569 : | # ( $tree, $label1 ) = normalize_newick_tree( $tree ) | ||
1570 : | #------------------------------------------------------------------------------- | ||
1571 : | sub normalize_newick_tree { | ||
1572 : | my ( $node ) = @_; | ||
1573 : | |||
1574 : | my @descends = newick_desc_list( $node ); | ||
1575 : | if ( @descends == 0 ) { return ( $node, lc newick_lbl( $node ) ) } | ||
1576 : | |||
1577 : | golsen | 1.9 | my %hash = map { ( normalize_newick_tree($_) )[1] => $_ } @descends; |
1578 : | golsen | 1.1 | my @keylist = sort { $a cmp $b } keys %hash; |
1579 : | set_newick_desc_list( $node, map { $hash{$_} } @keylist ); | ||
1580 : | |||
1581 : | ( $node, $keylist[0] ); | ||
1582 : | } | ||
1583 : | |||
1584 : | |||
1585 : | #------------------------------------------------------------------------------- | ||
1586 : | # Reverse tree order (in place). | ||
1587 : | # | ||
1588 : | # $tree = reverse_newick_tree( $tree ) | ||
1589 : | #------------------------------------------------------------------------------- | ||
1590 : | sub reverse_newick_tree { | ||
1591 : | my ( $node ) = @_; | ||
1592 : | |||
1593 : | my @descends = newick_desc_list( $node ); | ||
1594 : | if ( @descends ) { | ||
1595 : | set_newick_desc_list( $node, reverse @descends ); | ||
1596 : | foreach ( @descends ) { reverse_newick_tree( $_ ) } | ||
1597 : | } | ||
1598 : | $node; | ||
1599 : | } | ||
1600 : | |||
1601 : | |||
1602 : | #------------------------------------------------------------------------------- | ||
1603 : | # Standard unrooted tree (in place). | ||
1604 : | # | ||
1605 : | # $stdtree = std_unrooted_newick( $tree ) | ||
1606 : | #------------------------------------------------------------------------------- | ||
1607 : | sub std_unrooted_newick { | ||
1608 : | my ( $tree ) = @_; | ||
1609 : | |||
1610 : | my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree ); | ||
1611 : | ( normalize_newick_tree( reroot_newick_next_to_tip( $tree, $mintip ) ) )[0]; | ||
1612 : | } | ||
1613 : | |||
1614 : | |||
1615 : | #------------------------------------------------------------------------------- | ||
1616 : | # Move largest groups to periphery of tree (in place). | ||
1617 : | # | ||
1618 : | # dir <= -2 for up-sweeping tree (big groups always first), | ||
1619 : | # = -1 for big group first, balanced tree, | ||
1620 : | # = 0 for balanced tree, | ||
1621 : | # = 1 for small group first, balanced tree, and | ||
1622 : | # >= 2 for down-sweeping tree (small groups always top) | ||
1623 : | # | ||
1624 : | # $tree = aesthetic_newick_tree( $treeref, $dir ) | ||
1625 : | #------------------------------------------------------------------------------- | ||
1626 : | sub aesthetic_newick_tree { | ||
1627 : | my ( $tree, $dir ) = @_; | ||
1628 : | my %cnt; | ||
1629 : | |||
1630 : | $dir = ! $dir ? 0 : # Undefined or zero | ||
1631 : | $dir <= -2 ? -1000000 : | ||
1632 : | $dir < 0 ? -1 : | ||
1633 : | $dir >= 2 ? 1000000 : | ||
1634 : | 1 ; | ||
1635 : | build_tip_count_hash( $tree, \%cnt ); | ||
1636 : | reorder_by_tip_count( $tree, \%cnt, $dir ); | ||
1637 : | } | ||
1638 : | |||
1639 : | |||
1640 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1641 : | # Build a hash to look up the number of descendants of each node. | ||
1642 : | # Access count with $cntref->{$noderef} | ||
1643 : | # | ||
1644 : | # $count = build_tip_count_hash( $node, $cnt_hash_ref ) | ||
1645 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1646 : | sub build_tip_count_hash { | ||
1647 : | my ( $node, $cntref ) = @_; | ||
1648 : | my ( $i, $cnt ); | ||
1649 : | |||
1650 : | if ( newick_n_desc( $node ) < 1 ) { $cnt = 1 } | ||
1651 : | else { | ||
1652 : | $cnt = 0; | ||
1653 : | foreach ( newick_desc_list( $node ) ) { | ||
1654 : | $cnt += build_tip_count_hash( $_, $cntref ); | ||
1655 : | } | ||
1656 : | } | ||
1657 : | |||
1658 : | $cntref->{$node} = $cnt; | ||
1659 : | $cnt; | ||
1660 : | } | ||
1661 : | |||
1662 : | |||
1663 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1664 : | # $node = reorder_by_tip_count( $node, $cntref, $dir ) | ||
1665 : | # dir < 0 for upward branch (big group first), | ||
1666 : | # = 0 for no change, and | ||
1667 : | # > 0 for downward branch (small group first). | ||
1668 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1669 : | sub reorder_by_tip_count { | ||
1670 : | my ( $node, $cntref, $dir ) = @_; | ||
1671 : | |||
1672 : | my $nd = newick_n_desc( $node ); | ||
1673 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1674 : | |||
1675 : | # Reorder this subtree: | ||
1676 : | |||
1677 : | my $dl_ref = newick_desc_ref( $node ); | ||
1678 : | if ( $dir < 0 ) { # Big group first | ||
1679 : | @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref; | ||
1680 : | } | ||
1681 : | elsif ( $dir > 0 ) { # Small group first | ||
1682 : | @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref; | ||
1683 : | } | ||
1684 : | |||
1685 : | # Reorder within descendant subtrees: | ||
1686 : | |||
1687 : | my $step = 0; | ||
1688 : | if ( abs( $dir ) < 1e5 ) { | ||
1689 : | $dir = 1 - $nd; # Midgroup => as is | ||
1690 : | # $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 ); # Midgroup => outward | ||
1691 : | $step = 2; | ||
1692 : | } | ||
1693 : | |||
1694 : | for ( my $i = 0; $i < $nd; $i++ ) { | ||
1695 : | reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir ); | ||
1696 : | $dir += $step; | ||
1697 : | } | ||
1698 : | |||
1699 : | $node; | ||
1700 : | } | ||
1701 : | |||
1702 : | |||
1703 : | #------------------------------------------------------------------------------- | ||
1704 : | # Move smallest groups to periphery of tree (in place). | ||
1705 : | # | ||
1706 : | # dir <= -2 for up-sweeping tree (big groups always first), | ||
1707 : | # = -1 for big group first, balanced tree, | ||
1708 : | # = 0 for balanced tree, | ||
1709 : | # = 1 for small group first, balanced tree, and | ||
1710 : | # >= 2 for down-sweeping tree (small groups always top) | ||
1711 : | # | ||
1712 : | # $tree = unaesthetic_newick_tree( $treeref, $dir ) | ||
1713 : | #------------------------------------------------------------------------------- | ||
1714 : | golsen | 1.9 | sub unaesthetic_newick_tree |
1715 : | { | ||
1716 : | golsen | 1.1 | my ( $tree, $dir ) = @_; |
1717 : | my %cnt; | ||
1718 : | |||
1719 : | $dir = ! $dir ? 0 : # Undefined or zero | ||
1720 : | $dir <= -2 ? -1000000 : | ||
1721 : | $dir < 0 ? -1 : | ||
1722 : | $dir >= 2 ? 1000000 : | ||
1723 : | 1 ; | ||
1724 : | build_tip_count_hash( $tree, \%cnt ); | ||
1725 : | reorder_against_tip_count( $tree, \%cnt, $dir ); | ||
1726 : | } | ||
1727 : | |||
1728 : | |||
1729 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1730 : | # $node = reorder_by_tip_count( $node, $cntref, $dir ) | ||
1731 : | # dir < 0 for upward branch (big group first), | ||
1732 : | # = 0 for no change, and | ||
1733 : | # > 0 for downward branch (small group first). | ||
1734 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1735 : | golsen | 1.9 | sub reorder_against_tip_count |
1736 : | { | ||
1737 : | golsen | 1.1 | my ( $node, $cntref, $dir ) = @_; |
1738 : | |||
1739 : | my $nd = newick_n_desc( $node ); | ||
1740 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1741 : | |||
1742 : | # Reorder this subtree: | ||
1743 : | |||
1744 : | my $dl_ref = newick_desc_ref( $node ); | ||
1745 : | if ( $dir > 0 ) { # Big group first | ||
1746 : | @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref; | ||
1747 : | } | ||
1748 : | elsif ( $dir < 0 ) { # Small group first | ||
1749 : | @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref; | ||
1750 : | } | ||
1751 : | |||
1752 : | # Reorder within descendant subtrees: | ||
1753 : | |||
1754 : | my $step = 0; | ||
1755 : | if (abs( $dir ) < 1e5) { | ||
1756 : | $dir = 1 - $nd; # Midgroup => as is | ||
1757 : | # $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 ); # Midgroup => outward | ||
1758 : | $step = 2; | ||
1759 : | } | ||
1760 : | |||
1761 : | for ( my $i = 0; $i < $nd; $i++ ) { | ||
1762 : | reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir ); | ||
1763 : | $dir += $step; | ||
1764 : | } | ||
1765 : | |||
1766 : | $node; | ||
1767 : | } | ||
1768 : | |||
1769 : | |||
1770 : | #------------------------------------------------------------------------------- | ||
1771 : | # Randomize descendant order at each node (in place). | ||
1772 : | # | ||
1773 : | # $tree = random_order_newick_tree( $tree ) | ||
1774 : | #------------------------------------------------------------------------------- | ||
1775 : | golsen | 1.9 | sub random_order_newick_tree |
1776 : | { | ||
1777 : | golsen | 1.1 | my ( $node ) = @_; |
1778 : | |||
1779 : | my $nd = newick_n_desc( $node ); | ||
1780 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1781 : | |||
1782 : | # Reorder this subtree: | ||
1783 : | |||
1784 : | my $dl_ref = newick_desc_ref( $node ); | ||
1785 : | golsen | 1.9 | @$dl_ref = &random_order( @$dl_ref ); |
1786 : | golsen | 1.1 | |
1787 : | # Reorder descendants: | ||
1788 : | |||
1789 : | foreach ( @$dl_ref ) { random_order_newick_tree( $_ ) } | ||
1790 : | |||
1791 : | $node; | ||
1792 : | } | ||
1793 : | |||
1794 : | |||
1795 : | #------------------------------------------------------------------------------- | ||
1796 : | # Reroot a tree to the node that lies at the end of a path. | ||
1797 : | # | ||
1798 : | # $newtree = reroot_newick_by_path( @path ) | ||
1799 : | #------------------------------------------------------------------------------- | ||
1800 : | golsen | 1.9 | sub reroot_newick_by_path |
1801 : | { | ||
1802 : | golsen | 1.1 | my ( $node1, $path1, @rest ) = @_; |
1803 : | array_ref( $node1 ) || return undef; # Always expect a node | ||
1804 : | |||
1805 : | defined( $path1 ) && @rest || return $node1; # If no path, we're done | ||
1806 : | |||
1807 : | my $node2 = $rest[0]; # Next element in path is node 2 | ||
1808 : | newick_desc_i( $node1, $path1 ) eq $node2 || return undef; # Check link | ||
1809 : | |||
1810 : | # Remove node 2 from node 1 descendant list. Could use a simple splice: | ||
1811 : | # | ||
1812 : | # splice( @$dl1, $path1-1, 1 ); | ||
1813 : | # | ||
1814 : | golsen | 1.8 | # But the following maintains the cyclic order of the nodes: |
1815 : | golsen | 1.1 | |
1816 : | my $dl1 = newick_desc_ref( $node1 ); | ||
1817 : | my $nd1 = @$dl1; | ||
1818 : | if ( $path1 == 1 ) { shift @$dl1 } | ||
1819 : | elsif ( $path1 == $nd1 ) { pop @$dl1 } | ||
1820 : | else { @$dl1 = ( @$dl1[ $path1 .. $nd1-1 ] | ||
1821 : | , @$dl1[ 0 .. $path1-2 ] | ||
1822 : | ) | ||
1823 : | golsen | 1.8 | } |
1824 : | golsen | 1.1 | |
1825 : | # Append node 1 to node 2 descendant list (does not alter numbering): | ||
1826 : | |||
1827 : | my $dl2 = newick_desc_ref( $node2 ); | ||
1828 : | if ( array_ref( $dl2 ) ) { push @$dl2, $node1 } | ||
1829 : | golsen | 1.8 | else { set_newick_desc_list( $node2, $node1 ) } |
1830 : | golsen | 1.1 | |
1831 : | # Move c1 comments from node 1 to node 2: | ||
1832 : | |||
1833 : | my $C11 = newick_c1( $node1 ); | ||
1834 : | my $C12 = newick_c1( $node2 ); | ||
1835 : | ! defined( $C11 ) || set_newick_c1( $node1, undef ); # Remove them from node 1 | ||
1836 : | golsen | 1.8 | if ( $C12 && @$C12 ) { # If node 2 comments and |
1837 : | if ( $C11 && @$C11 ) { unshift @$C12, @$C11 } # Node 1, prefix 1 to 2 | ||
1838 : | golsen | 1.1 | } |
1839 : | golsen | 1.8 | elsif ( $C11 && @$C11 ) { set_newick_c1( $node2, $C11 ) } # Otherwise move node 1 link |
1840 : | golsen | 1.1 | |
1841 : | # Swap branch lengths and comments for reversal of link direction: | ||
1842 : | |||
1843 : | my $x1 = newick_x( $node1 ); | ||
1844 : | my $x2 = newick_x( $node2 ); | ||
1845 : | ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node1, $x2 ); | ||
1846 : | ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node2, $x1 ); | ||
1847 : | |||
1848 : | my $c41 = newick_c4( $node1 ); | ||
1849 : | my $c42 = newick_c4( $node2 ); | ||
1850 : | ! defined( $c42 ) || ! @$c42 || set_newick_c4( $node1, $c42 ); | ||
1851 : | ! defined( $c41 ) || ! @$c41 || set_newick_c4( $node2, $c41 ); | ||
1852 : | |||
1853 : | my $c51 = newick_c5( $node1 ); | ||
1854 : | my $c52 = newick_c5( $node2 ); | ||
1855 : | ! defined( $c52 ) || ! @$c52 || set_newick_c5( $node1, $c52 ); | ||
1856 : | ! defined( $c51 ) || ! @$c51 || set_newick_c5( $node2, $c51 ); | ||
1857 : | |||
1858 : | reroot_newick_by_path( @rest ); # Node 2 is first element of rest | ||
1859 : | } | ||
1860 : | |||
1861 : | |||
1862 : | #------------------------------------------------------------------------------- | ||
1863 : | # Move root of tree to named tip. | ||
1864 : | # | ||
1865 : | # $newtree = reroot_newick_to_tip( $tree, $tip ) | ||
1866 : | #------------------------------------------------------------------------------- | ||
1867 : | sub reroot_newick_to_tip { | ||
1868 : | my ( $tree, $tipname ) = @_; | ||
1869 : | reroot_newick_by_path( path_to_tip( $tree, $tipname ) ); | ||
1870 : | } | ||
1871 : | |||
1872 : | |||
1873 : | #------------------------------------------------------------------------------- | ||
1874 : | # Move root of tree to be node adjacent to a named tip. | ||
1875 : | # | ||
1876 : | # $newtree = reroot_newick_next_to_tip( $tree, $tip ) | ||
1877 : | #------------------------------------------------------------------------------- | ||
1878 : | sub reroot_newick_next_to_tip { | ||
1879 : | my ( $tree, $tipname ) = @_; | ||
1880 : | my @path = path_to_tip( $tree, $tipname ); | ||
1881 : | @path || return undef; | ||
1882 : | @path == 1 ? reroot_newick_by_path( $tree, 1, newick_desc_i( $tree, 1 ) ) | ||
1883 : | : reroot_newick_by_path( @path[0 .. @path-3] ); | ||
1884 : | } | ||
1885 : | |||
1886 : | |||
1887 : | #------------------------------------------------------------------------------- | ||
1888 : | # Move root of tree to a node, defined by 1 or 3 tip names. | ||
1889 : | # | ||
1890 : | # $newtree = reroot_newick_to_node( $tree, @node ) | ||
1891 : | #------------------------------------------------------------------------------- | ||
1892 : | sub reroot_newick_to_node { | ||
1893 : | reroot_newick_by_path( path_to_node( @_ ) ); | ||
1894 : | } | ||
1895 : | |||
1896 : | |||
1897 : | #------------------------------------------------------------------------------- | ||
1898 : | # Move root of tree to a node, defined by reference. | ||
1899 : | # | ||
1900 : | # $newtree = reroot_newick_to_node_ref( $tree, $noderef ) | ||
1901 : | #------------------------------------------------------------------------------- | ||
1902 : | sub reroot_newick_to_node_ref { | ||
1903 : | my ( $tree, $node ) = @_; | ||
1904 : | reroot_newick_by_path( path_to_node_ref( $tree, $node ) ); | ||
1905 : | } | ||
1906 : | |||
1907 : | |||
1908 : | #------------------------------------------------------------------------------- | ||
1909 : | golsen | 1.9 | # Reroot a newick tree along the path between 2 nodes: |
1910 : | # | ||
1911 : | # $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction ) | ||
1912 : | #------------------------------------------------------------------------------- | ||
1913 : | sub reroot_newick_between_nodes | ||
1914 : | { | ||
1915 : | my ( $tree, $node1, $node2, $fraction ) = @_; | ||
1916 : | array_ref( $tree ) or return undef; | ||
1917 : | $fraction >= 0 && $fraction <= 1 or return undef; | ||
1918 : | |||
1919 : | # Find the paths to the nodes: | ||
1920 : | |||
1921 : | my @path1 = path_to_node( $tree, $node1 ) or return undef; | ||
1922 : | my @path2 = path_to_node( $tree, $node2 ) or return undef; | ||
1923 : | |||
1924 : | # Trim the common prefix, saving it: | ||
1925 : | |||
1926 : | my @prefix = (); | ||
1927 : | while ( $path1[1] == $path2[1] ) | ||
1928 : | { | ||
1929 : | push @prefix, splice( @path1, 0, 2 ); | ||
1930 : | splice( @path2, 0, 2 ); | ||
1931 : | } | ||
1932 : | |||
1933 : | my ( @path, $dist ); | ||
1934 : | if ( @path1 < 3 ) | ||
1935 : | { | ||
1936 : | @path2 >= 3 or return undef; # node1 = node2 | ||
1937 : | $dist = $fraction * newick_path_length( @path2 ); | ||
1938 : | @path = @path2; | ||
1939 : | } | ||
1940 : | elsif ( @path2 < 3 ) | ||
1941 : | { | ||
1942 : | $dist = ( 1 - $fraction ) * newick_path_length( @path1 ); | ||
1943 : | @path = @path1; | ||
1944 : | } | ||
1945 : | else | ||
1946 : | { | ||
1947 : | my $dist1 = newick_path_length( @path1 ); | ||
1948 : | my $dist2 = newick_path_length( @path2 ); | ||
1949 : | $dist = $fraction * ( $dist1 + $dist2 ) - $dist1; | ||
1950 : | @path = ( $dist <= 0 ) ? @path1 : @path2; | ||
1951 : | $dist = abs( $dist ); | ||
1952 : | } | ||
1953 : | |||
1954 : | # Descend tree until we reach the insertion branch: | ||
1955 : | |||
1956 : | my $x; | ||
1957 : | while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) ) | ||
1958 : | { | ||
1959 : | $dist -= $x; | ||
1960 : | push @prefix, splice( @path, 0, 2 ); | ||
1961 : | } | ||
1962 : | |||
1963 : | # Insert the new node: | ||
1964 : | |||
1965 : | my $newnode = [ [ $path[2] ], undef, $dist ]; | ||
1966 : | set_newick_desc_i( $path[0], $path[1], $newnode ); | ||
1967 : | set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) ); | ||
1968 : | |||
1969 : | # We can now build the path from root to the new node | ||
1970 : | |||
1971 : | reroot_newick_by_path( @prefix, @path[0,1], $newnode ); | ||
1972 : | } | ||
1973 : | |||
1974 : | |||
1975 : | #------------------------------------------------------------------------------- | ||
1976 : | golsen | 1.1 | # Move root of tree to an approximate midpoint. |
1977 : | # | ||
1978 : | # $newtree = reroot_newick_to_approx_midpoint( $tree ) | ||
1979 : | #------------------------------------------------------------------------------- | ||
1980 : | sub reroot_newick_to_approx_midpoint { | ||
1981 : | my ( $tree ) = @_; | ||
1982 : | golsen | 1.5 | |
1983 : | golsen | 1.1 | # Compile average tip to node distances assending |
1984 : | |||
1985 : | my $dists1 = average_to_tips_1( $tree ); | ||
1986 : | |||
1987 : | # Compile average tip to node distances descending, returning midpoint node | ||
1988 : | |||
1989 : | my $node = average_to_tips_2( $dists1, undef, undef ); | ||
1990 : | |||
1991 : | # Reroot | ||
1992 : | |||
1993 : | $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree | ||
1994 : | } | ||
1995 : | |||
1996 : | |||
1997 : | sub average_to_tips_1 { | ||
1998 : | my ( $node ) = @_; | ||
1999 : | |||
2000 : | my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node ); | ||
2001 : | my $x_below = 0; | ||
2002 : | if ( @desc_dists ) | ||
2003 : | { | ||
2004 : | foreach ( @desc_dists ) { $x_below += $_->[0] } | ||
2005 : | $x_below /= @desc_dists; | ||
2006 : | } | ||
2007 : | my $x = newick_x( $node ) || 0; | ||
2008 : | my $x_net = $x_below + $x; | ||
2009 : | |||
2010 : | [ $x_net, $x, $x_below, [ @desc_dists ], $node ] | ||
2011 : | } | ||
2012 : | |||
2013 : | golsen | 1.9 | |
2014 : | golsen | 1.1 | sub average_to_tips_2 { |
2015 : | my ( $dists1, $x_above, $anc_node ) = @_; | ||
2016 : | my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1; | ||
2017 : | |||
2018 : | # Are we done? Root is in this node's branch, or "above"? | ||
2019 : | |||
2020 : | # defined( $x_above ) and print STDERR "x_above = $x_above\n"; | ||
2021 : | # print STDERR "x = $x\n"; | ||
2022 : | # print STDERR "x_below = $x_below\n"; | ||
2023 : | # print STDERR "n_desc = ", scalar @$desc_list, "\n\n"; | ||
2024 : | |||
2025 : | if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) ) | ||
2026 : | { | ||
2027 : | # At this point the root can only be in this node's branch, | ||
2028 : | # or "above" it in the current rooting of the tree (which | ||
2029 : | # would mean that the midpoint is actually down a different | ||
2030 : | # path from the root of the current tree). | ||
2031 : | # | ||
2032 : | # Is the root in the current branch? | ||
2033 : | |||
2034 : | if ( ( $x_below + $x ) >= $x_above ) | ||
2035 : | { | ||
2036 : | return ( $x_above >= $x_below ) ? $anc_node : $node; | ||
2037 : | } | ||
2038 : | else | ||
2039 : | { | ||
2040 : | return undef; | ||
2041 : | } | ||
2042 : | } | ||
2043 : | |||
2044 : | golsen | 1.5 | # The root must be somewhere below this node: |
2045 : | golsen | 1.1 | |
2046 : | golsen | 1.5 | my $n_1 = @$desc_list - ( $anc_node ? 0 : 1 ); |
2047 : | golsen | 1.1 | my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 ); |
2048 : | |||
2049 : | foreach ( @$desc_list ) | ||
2050 : | { | ||
2051 : | # If input tree is tip_rooted, $n-1 can be 0, so: | ||
2052 : | |||
2053 : | my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0; | ||
2054 : | my $root = average_to_tips_2( $_, $above2, $node ); | ||
2055 : | if ( $root ) { return $root } | ||
2056 : | } | ||
2057 : | |||
2058 : | # Was not anywhere below this node (oh-oh): | ||
2059 : | |||
2060 : | return undef; | ||
2061 : | } | ||
2062 : | |||
2063 : | golsen | 1.9 | |
2064 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
2065 : | golsen | 1.5 | # Move root of tree to an approximate midpoint. Weight by tips. |
2066 : | # | ||
2067 : | # $newtree = reroot_newick_to_approx_midpoint_w( $tree ) | ||
2068 : | #------------------------------------------------------------------------------- | ||
2069 : | sub reroot_newick_to_approx_midpoint_w { | ||
2070 : | my ( $tree ) = @_; | ||
2071 : | |||
2072 : | # Compile average tip to node distances assending | ||
2073 : | |||
2074 : | my $dists1 = average_to_tips_1_w( $tree ); | ||
2075 : | |||
2076 : | # Compile average tip to node distances descending, returning midpoint node | ||
2077 : | |||
2078 : | my $node = average_to_tips_2_w( $dists1, undef, undef, undef ); | ||
2079 : | |||
2080 : | # Reroot | ||
2081 : | |||
2082 : | $node ? reroot_newick_to_node_ref( $tree, $node ) : $tree | ||
2083 : | } | ||
2084 : | |||
2085 : | |||
2086 : | sub average_to_tips_1_w { | ||
2087 : | my ( $node ) = @_; | ||
2088 : | |||
2089 : | my @desc_dists = map { average_to_tips_1_w( $_ ) } newick_desc_list( $node ); | ||
2090 : | my $x_below = 0; | ||
2091 : | my $n_below = 1; | ||
2092 : | if ( @desc_dists ) | ||
2093 : | { | ||
2094 : | $n_below = 0; | ||
2095 : | my $n; | ||
2096 : | foreach ( @desc_dists ) | ||
2097 : | { | ||
2098 : | $n_below += $n = $_->[1]; | ||
2099 : | $x_below += $n * $_->[0]; | ||
2100 : | } | ||
2101 : | $x_below /= $n_below; | ||
2102 : | } | ||
2103 : | my $x = newick_x( $node ) || 0; | ||
2104 : | my $x_net = $x_below + $x; | ||
2105 : | |||
2106 : | [ $x_net, $n_below, $x, $x_below, [ @desc_dists ], $node ] | ||
2107 : | } | ||
2108 : | |||
2109 : | |||
2110 : | sub average_to_tips_2_w { | ||
2111 : | my ( $dists1, $x_above, $n_above, $anc_node ) = @_; | ||
2112 : | my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1; | ||
2113 : | |||
2114 : | # Are we done? Root is in this node's branch, or "above"? | ||
2115 : | |||
2116 : | # defined( $x_above ) and print STDERR "x_above = $x_above\n"; | ||
2117 : | # print STDERR "x = $x\n"; | ||
2118 : | # print STDERR "x_below = $x_below\n"; | ||
2119 : | # print STDERR "n_desc = ", scalar @$desc_list, "\n\n"; | ||
2120 : | |||
2121 : | if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) ) | ||
2122 : | { | ||
2123 : | # At this point the root can only be in this node's branch, | ||
2124 : | # or "above" it in the current rooting of the tree (which | ||
2125 : | # would mean that the midpoint is actually down a different | ||
2126 : | # path from the root of the current tree). | ||
2127 : | # | ||
2128 : | # Is the root in the current branch? | ||
2129 : | |||
2130 : | if ( ( $x_below + $x ) >= $x_above ) | ||
2131 : | { | ||
2132 : | return ( $x_above >= $x_below ) ? $anc_node : $node; | ||
2133 : | } | ||
2134 : | else | ||
2135 : | { | ||
2136 : | return undef; | ||
2137 : | } | ||
2138 : | } | ||
2139 : | |||
2140 : | # The root must be some where below this node: | ||
2141 : | |||
2142 : | $n_above ||= 0; | ||
2143 : | my $n = $n_above + $n_below; | ||
2144 : | my $ttl_w_dist = ( $n_below * $x_below ) | ||
2145 : | + ( defined( $x_above ) ? $n_above * ( $x_above + $x ) : 0 ); | ||
2146 : | |||
2147 : | foreach ( @$desc_list ) | ||
2148 : | { | ||
2149 : | my $n_2 = $_->[1]; # n in subtree | ||
2150 : | my $n_above2 = $n - $n_2; # tip rooted has 1 above | ||
2151 : | |||
2152 : | # If input tree is tip_rooted, $n_above2 can be 0, so: | ||
2153 : | |||
2154 : | my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 ) | ||
2155 : | : 0; | ||
2156 : | my $root = average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node ); | ||
2157 : | if ( $root ) { return $root } | ||
2158 : | } | ||
2159 : | |||
2160 : | # Was not anywhere below this node (oh-oh): | ||
2161 : | |||
2162 : | return undef; | ||
2163 : | } | ||
2164 : | |||
2165 : | golsen | 1.9 | |
2166 : | golsen | 1.5 | #------------------------------------------------------------------------------- |
2167 : | golsen | 1.1 | # Move root of tree from tip to adjacent node. |
2168 : | # | ||
2169 : | # $newtree = uproot_tip_rooted_newick( $tree ) | ||
2170 : | #------------------------------------------------------------------------------- | ||
2171 : | sub uproot_tip_rooted_newick { | ||
2172 : | my ( $node ) = @_; | ||
2173 : | newick_is_tip_rooted( $node ) || return $node; | ||
2174 : | |||
2175 : | # Path to the sole descendant: | ||
2176 : | |||
2177 : | reroot_newick_by_path( $node, 1, newick_desc_i( $node, 1 ) ); | ||
2178 : | } | ||
2179 : | |||
2180 : | |||
2181 : | #------------------------------------------------------------------------------- | ||
2182 : | # Remove root bifurcation. | ||
2183 : | # | ||
2184 : | # Root node label, label comment and descendant list comment are discarded. | ||
2185 : | # | ||
2186 : | # $newtree = uproot_newick( $tree ) | ||
2187 : | #------------------------------------------------------------------------------- | ||
2188 : | sub uproot_newick { | ||
2189 : | my ( $node0 ) = @_; | ||
2190 : | newick_is_rooted( $node0 ) || return $node0; | ||
2191 : | |||
2192 : | my ( $node1, $node2 ) = newick_desc_list( $node0 ); | ||
2193 : | |||
2194 : | # Ensure that node1 has at least 1 descendant | ||
2195 : | |||
2196 : | if ( newick_n_desc( $node1 ) ) { | ||
2197 : | push @{ newick_desc_ref( $node1 ) }, $node2; # Add node2 to descend list | ||
2198 : | } | ||
2199 : | |||
2200 : | # Or node2 has at least 1 descendant | ||
2201 : | |||
2202 : | elsif ( newick_n_desc( $node2 ) ) { | ||
2203 : | unshift @{ newick_desc_ref( $node2 ) }, $node1; # Add node1 to descend list | ||
2204 : | ( $node1, $node2 ) = ( $node2, $node1 ); # And reverse labels | ||
2205 : | } | ||
2206 : | |||
2207 : | # We could make this into a tip rooted tree, but for now: | ||
2208 : | |||
2209 : | else { return $node0 } | ||
2210 : | |||
2211 : | # Prefix node1 branch to that of node2: | ||
2212 : | |||
2213 : | add_to_newick_branch( $node2, $node1 ); | ||
2214 : | set_newick_x( $node1, undef ); | ||
2215 : | |||
2216 : | # Tree prefix comment lists (as references): | ||
2217 : | |||
2218 : | my $C10 = newick_c1( $node0 ); | ||
2219 : | my $C11 = newick_c1( $node1 ); | ||
2220 : | golsen | 1.9 | if ( $C11 && @$C11 ) { |
2221 : | golsen | 1.1 | if ( $C10 && @$C10 ) { unshift @$C11, @$C10 } # Prefix to node1 comments |
2222 : | } | ||
2223 : | elsif ( $C10 && @$C10 ) { | ||
2224 : | set_newick_c1( $node1, $C10 ) # Or move node0 comments to node1 | ||
2225 : | } | ||
2226 : | |||
2227 : | $node1; | ||
2228 : | } | ||
2229 : | |||
2230 : | |||
2231 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2232 : | # Prefix branch of node2 to that of node1: | ||
2233 : | # | ||
2234 : | # $node1 = add_to_newick_branch( $node1, $node2 ) | ||
2235 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2236 : | sub add_to_newick_branch { | ||
2237 : | my ( $node1, $node2 ) = @_; | ||
2238 : | array_ref( $node1 ) || die "add_to_newick_branch: arg 1 not array ref\n"; | ||
2239 : | array_ref( $node2 ) || die "add_to_newick_branch: arg 2 not array ref\n"; | ||
2240 : | |||
2241 : | # Node structure template: | ||
2242 : | # my ( $DL, $L, $X, $C1, $C2, $C3, $C4, $C5 ) = @$node; | ||
2243 : | |||
2244 : | # Fix branch lengths for joining of two branches: | ||
2245 : | |||
2246 : | set_newick_x( $node1, newick_x( $node1 ) + newick_x( $node2 ) ); | ||
2247 : | |||
2248 : | # Merge branch length comments: | ||
2249 : | |||
2250 : | my $C41 = newick_c4( $node1 ); # Ref to node1 C4 | ||
2251 : | my $C42 = newick_c4( $node2 ); # Ref to node2 C4 | ||
2252 : | if ( $C41 && @$C41 ) { | ||
2253 : | if ( $C42 && @$C42 ) { unshift @$C41, @$C42 } # Add node2 comment | ||
2254 : | } | ||
2255 : | elsif ( $C42 && @$C42 ) { set_newick_c4( $node1, $C42 ) } # Or move node1 comment | ||
2256 : | |||
2257 : | my $C51 = newick_c5( $node1 ); # Ref to node1 C5 | ||
2258 : | my $C52 = newick_c5( $node2 ); # Ref to node2 C5 | ||
2259 : | if ( $C51 && @$C51 ) { | ||
2260 : | if ( $C52 && @$C52 ) { unshift @$C51, @$C52 } # Add node2 comment | ||
2261 : | } | ||
2262 : | elsif ( $C52 && @$C52 ) { set_newick_c5( $node1, $C52 ) } # Or move node1 comment | ||
2263 : | |||
2264 : | $node1; | ||
2265 : | } | ||
2266 : | |||
2267 : | |||
2268 : | #------------------------------------------------------------------------------- | ||
2269 : | golsen | 1.5 | # Collapse zero-length branches to make multifurcation. The original tree |
2270 : | # is modified. | ||
2271 : | # | ||
2272 : | # $tree = collapse_zero_length_branches( $tree ) | ||
2273 : | # $tree = collapse_zero_length_branches( $tree, $not_root ) | ||
2274 : | #------------------------------------------------------------------------------- | ||
2275 : | sub collapse_zero_length_branches { | ||
2276 : | my ( $tree, $not_root ) = @_; | ||
2277 : | array_ref( $tree ) || return undef; | ||
2278 : | |||
2279 : | my @desc = newick_desc_list( $tree ); | ||
2280 : | @desc or return ( $tree ); # Cannot collapse terminal branch | ||
2281 : | |||
2282 : | # Analyze descendants: | ||
2283 : | |||
2284 : | $not_root ||= 0; | ||
2285 : | my @new_desc = (); | ||
2286 : | my $changed = 0; | ||
2287 : | foreach ( @desc ) | ||
2288 : | { | ||
2289 : | my ( undef, @to_add ) = collapse_zero_length_branches( $_, $not_root+1 ); | ||
2290 : | if ( @to_add ) | ||
2291 : | { | ||
2292 : | push @new_desc, @to_add; | ||
2293 : | $changed = 1; | ||
2294 : | } | ||
2295 : | else | ||
2296 : | { | ||
2297 : | push @new_desc, $_; | ||
2298 : | } | ||
2299 : | } | ||
2300 : | set_newick_desc_ref( $tree, [ @new_desc ] ) if $changed; | ||
2301 : | |||
2302 : | # Collapse if not root, not tip and zero (or negative) branch: | ||
2303 : | |||
2304 : | my $collapse = $not_root && @new_desc && ( newick_x( $tree ) <= 0 ) ? 1 : 0; | ||
2305 : | ( $tree, ( $collapse ? @new_desc : () ) ); | ||
2306 : | } | ||
2307 : | |||
2308 : | golsen | 1.8 | #------------------------------------------------------------------------------- |
2309 : | # Add a subtree to a newick tree node: | ||
2310 : | # | ||
2311 : | # $node = newick_insert_at_node( $node, $subtree ) | ||
2312 : | #------------------------------------------------------------------------------- | ||
2313 : | sub newick_insert_at_node | ||
2314 : | { | ||
2315 : | my ( $node, $subtree ) = @_; | ||
2316 : | array_ref( $node ) && array_ref( $subtree ) or return undef; | ||
2317 : | |||
2318 : | # We could check validity of trees, but .... | ||
2319 : | |||
2320 : | my $dl = newick_desc_ref( $node ); | ||
2321 : | if ( array_ref( $dl ) ) | ||
2322 : | { | ||
2323 : | push @$dl, $subtree; | ||
2324 : | } | ||
2325 : | else | ||
2326 : | { | ||
2327 : | set_newick_desc_ref( $node, [ $subtree ] ); | ||
2328 : | } | ||
2329 : | return $node; | ||
2330 : | } | ||
2331 : | |||
2332 : | |||
2333 : | #------------------------------------------------------------------------------- | ||
2334 : | # Insert a subtree into a newick tree along the path between 2 nodes: | ||
2335 : | # | ||
2336 : | # $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction ) | ||
2337 : | #------------------------------------------------------------------------------- | ||
2338 : | sub newick_insert_between_nodes | ||
2339 : | { | ||
2340 : | my ( $tree, $subtree, $node1, $node2, $fraction ) = @_; | ||
2341 : | array_ref( $tree ) && array_ref( $subtree ) or return undef; | ||
2342 : | $fraction >= 0 && $fraction <= 1 or return undef; | ||
2343 : | |||
2344 : | # Find the paths to the nodes: | ||
2345 : | |||
2346 : | my @path1 = path_to_node( $tree, $node1 ) or return undef; | ||
2347 : | my @path2 = path_to_node( $tree, $node2 ) or return undef; | ||
2348 : | |||
2349 : | # Trim the common prefix: | ||
2350 : | |||
2351 : | while ( $path1[1] == $path2[1] ) | ||
2352 : | { | ||
2353 : | splice( @path1, 0, 2 ); | ||
2354 : | splice( @path2, 0, 2 ); | ||
2355 : | } | ||
2356 : | |||
2357 : | my ( @path, $dist ); | ||
2358 : | if ( @path1 < 3 ) | ||
2359 : | { | ||
2360 : | @path2 >= 3 or return undef; # node1 = node2 | ||
2361 : | golsen | 1.9 | $dist = $fraction * newick_path_length( @path2 ); |
2362 : | golsen | 1.8 | @path = @path2; |
2363 : | } | ||
2364 : | elsif ( @path2 < 3 ) | ||
2365 : | { | ||
2366 : | $dist = ( 1 - $fraction ) * newick_path_length( @path1 ); | ||
2367 : | @path = @path1; | ||
2368 : | } | ||
2369 : | else | ||
2370 : | { | ||
2371 : | my $dist1 = newick_path_length( @path1 ); | ||
2372 : | my $dist2 = newick_path_length( @path2 ); | ||
2373 : | $dist = $fraction * ( $dist1 + $dist2 ) - $dist1; | ||
2374 : | @path = ( $dist <= 0 ) ? @path1 : @path2; | ||
2375 : | $dist = abs( $dist ); | ||
2376 : | } | ||
2377 : | |||
2378 : | # Descend tree until we reach the insertion branch: | ||
2379 : | |||
2380 : | my $x; | ||
2381 : | while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) ) | ||
2382 : | { | ||
2383 : | $dist -= $x; | ||
2384 : | splice( @path, 0, 2 ); | ||
2385 : | } | ||
2386 : | |||
2387 : | # Insert the new node: | ||
2388 : | |||
2389 : | set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] ); | ||
2390 : | set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) ); | ||
2391 : | |||
2392 : | return $tree; | ||
2393 : | } | ||
2394 : | |||
2395 : | golsen | 1.5 | |
2396 : | #------------------------------------------------------------------------------- | ||
2397 : | golsen | 1.1 | # Prune one or more tips from a tree: |
2398 : | # Caveat: if one tip is listed, the original tree is modified. | ||
2399 : | golsen | 1.5 | # if more than one tip is listed, a copy of the tree is returned |
2400 : | golsen | 1.1 | # (even if it is just listing the same tip twice!). |
2401 : | # | ||
2402 : | # $newtree = prune_from_newick( $tree, $tip ) | ||
2403 : | # $newtree = prune_from_newick( $tree, @tips ) | ||
2404 : | # $newtree = prune_from_newick( $tree, \@tips ) | ||
2405 : | #------------------------------------------------------------------------------- | ||
2406 : | sub prune_from_newick { | ||
2407 : | my ( $tr, @tips ) = @_; | ||
2408 : | if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } } | ||
2409 : | golsen | 1.9 | |
2410 : | golsen | 1.1 | if ( @tips == 0 ) { return $tr } |
2411 : | if ( @tips == 1 ) { return prune_1_from_newick( $tr, @tips ) } | ||
2412 : | |||
2413 : | my %del = map { ( $_, 1 ) } @tips; | ||
2414 : | my @keep = grep { ! $del{ $_ } } newick_tip_list( $tr ); | ||
2415 : | newick_subtree( $tr, @keep ); | ||
2416 : | } | ||
2417 : | |||
2418 : | |||
2419 : | #------------------------------------------------------------------------------- | ||
2420 : | # Prune a tip from a tree: | ||
2421 : | # | ||
2422 : | # $newtree = prune_1_from_newick( $tree, $tip ) | ||
2423 : | #------------------------------------------------------------------------------- | ||
2424 : | sub prune_1_from_newick { | ||
2425 : | my ( $tr, $tip ) = @_; | ||
2426 : | my @path = path_to_tip( $tr, $tip ); | ||
2427 : | if ( @path < 3 ) { return $tr } | ||
2428 : | |||
2429 : | my $node = $path[-1]; # Node with the tip | ||
2430 : | my $i1 = $path[-2]; # Descendant number of node in ancestor desc list | ||
2431 : | my $anc1 = $path[-3]; # Ancestor of node | ||
2432 : | my $nd1 = newick_n_desc( $anc1 ); # Number of descendants of ancestor | ||
2433 : | my $anc2 = ( @path >= 5 ) ? $path[-5] : undef; # Ancestor of anc1 | ||
2434 : | |||
2435 : | # dump_tree( $node ); | ||
2436 : | # print STDERR "i1 = $i1\n"; | ||
2437 : | # dump_tree( $anc1 ); | ||
2438 : | # print STDERR "nd1 = $nd1\n"; | ||
2439 : | # defined( $anc2 ) && dump_tree( $anc2 ); | ||
2440 : | |||
2441 : | if ( $nd1 > 3 || ( $anc2 && $nd1 > 2 ) ) { # Tip joins at multifurcation | ||
2442 : | splice( @{ $anc1->[0] }, $i1-1, 1 ); # delete the descendant | ||
2443 : | } | ||
2444 : | |||
2445 : | elsif ( $anc2 ) { # Tip joins at internal bifurcation | ||
2446 : | my $sis = newick_desc_i( $anc1, 3-$i1 ); # find sister node | ||
2447 : | add_to_newick_branch( $sis, $anc1 ); # combine internal branches | ||
2448 : | set_newick_desc_i( $anc2, $path[-4], $sis ); # remove $anc1 | ||
2449 : | } | ||
2450 : | |||
2451 : | elsif ( $nd1 == 2) { # Tip joins bifurcating root node | ||
2452 : | my $sis = newick_desc_i( $anc1, 3-$i1 ); # find sister node | ||
2453 : | $sis->[1] = $anc1->[1] if ! $sis->[1] && $anc1->[1]; # root label | ||
2454 : | $sis->[2] = undef; # root branch len | ||
2455 : | $sis->[3] = $anc1->[3] if ! $sis->[3] && $anc1->[3]; # tree comment | ||
2456 : | $sis->[4] = $anc1->[4] if ! $sis->[4] && $anc1->[4]; # desc list comment | ||
2457 : | golsen | 1.9 | $sis->[5] = $anc1->[5] if ! $sis->[5] && $anc1->[5]; # label comment |
2458 : | golsen | 1.1 | $sis->[6] = undef if $sis->[6]; # root branch comment |
2459 : | $sis->[7] = undef if $sis->[7]; # root branch comment | ||
2460 : | $tr = $sis; # sister is new root | ||
2461 : | } | ||
2462 : | |||
2463 : | elsif ( $nd1 == 3 ) { # Tip joins trifurcating root: | ||
2464 : | splice( @{ $anc1->[0] }, $i1-1, 1 ); # delete the descendant, and | ||
2465 : | $tr = uproot_newick( $tr ); # fix the rooting | ||
2466 : | } | ||
2467 : | |||
2468 : | else { | ||
2469 : | return undef; | ||
2470 : | } | ||
2471 : | |||
2472 : | return $tr; | ||
2473 : | } | ||
2474 : | |||
2475 : | |||
2476 : | #------------------------------------------------------------------------------- | ||
2477 : | # Produce a subtree with the desired tips: | ||
2478 : | # | ||
2479 : | # Except for (some) tip nodes, the tree produced is a copy. | ||
2480 : | # There is no check that requested tips exist. | ||
2481 : | # | ||
2482 : | # $newtree = newick_subtree( $tree, @tips ) | ||
2483 : | # $newtree = newick_subtree( $tree, \@tips ) | ||
2484 : | #------------------------------------------------------------------------------- | ||
2485 : | sub newick_subtree { | ||
2486 : | my ( $tr, @tips ) = @_; | ||
2487 : | if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } } | ||
2488 : | |||
2489 : | if ( @tips < 2 ) { return undef } | ||
2490 : | my $was_rooted = newick_is_rooted( $tr ); | ||
2491 : | my $keephash = { map { ( $_, 1 ) } @tips }; | ||
2492 : | my $tr2 = subtree1( $tr, $keephash ); | ||
2493 : | $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 ); | ||
2494 : | $tr2->[2] = undef if $tr2; # undef root branch length | ||
2495 : | $tr2; | ||
2496 : | } | ||
2497 : | |||
2498 : | |||
2499 : | sub subtree1 { | ||
2500 : | my ( $tr, $keep ) = @_; | ||
2501 : | my @desc1 = newick_desc_list( $tr ); | ||
2502 : | |||
2503 : | # Is this a tip, and is it in the keep list? | ||
2504 : | |||
2505 : | if ( @desc1 < 1 ) { | ||
2506 : | return ( $keep->{ newick_lbl( $tr ) } ) ? $tr : undef; | ||
2507 : | } | ||
2508 : | |||
2509 : | # Internal node: analyze the descendants: | ||
2510 : | |||
2511 : | my @desc2 = (); | ||
2512 : | foreach ( @desc1 ) { | ||
2513 : | my $desc = subtree1( $_, $keep ); | ||
2514 : | if ( $desc && @$desc ) { push @desc2, $desc } | ||
2515 : | } | ||
2516 : | |||
2517 : | if ( @desc2 == 0 ) { return undef } | ||
2518 : | if ( @desc2 > 1 ) { return [ \@desc2, @$tr[ 1 .. @$tr - 1 ] ] } | ||
2519 : | |||
2520 : | # Exactly 1 descendant | ||
2521 : | |||
2522 : | my $desc = $desc2[ 0 ]; | ||
2523 : | my @nn = ( $desc->[0], | ||
2524 : | $desc->[1] ? $desc->[1] : $tr->[1], | ||
2525 : | defined( $tr->[2] ) ? $desc->[2] + $tr->[2] : undef | ||
2526 : | ); | ||
2527 : | |||
2528 : | # Merge comments (only recreating the ones that existed): | ||
2529 : | |||
2530 : | if ( $tr->[3] && @{$tr->[3]} || $desc->[3] && @{$desc->[3]} ) { | ||
2531 : | $nn[3] = [ $tr->[3] ? @{$tr->[3]} : (), $desc->[3] ? @{$desc->[3]} : () ]; | ||
2532 : | } | ||
2533 : | if ( $tr->[4] && @{$tr->[4]} || $desc->[4] && @{$desc->[4]} ) { | ||
2534 : | $nn[4] = [ $tr->[4] ? @{$tr->[4]} : (), $desc->[4] ? @{$desc->[4]} : () ]; | ||
2535 : | } | ||
2536 : | if ( $tr->[5] && @{$tr->[5]} || $desc->[5] && @{$desc->[5]} ) { | ||
2537 : | $nn[5] = [ $tr->[5] ? @{$tr->[5]} : (), $desc->[5] ? @{$desc->[5]} : () ]; | ||
2538 : | } | ||
2539 : | if ( $tr->[6] && @{$tr->[6]} || $desc->[6] && @{$desc->[6]} ) { | ||
2540 : | $nn[6] = [ $tr->[6] ? @{$tr->[6]} : (), $desc->[6] ? @{$desc->[6]} : () ]; | ||
2541 : | } | ||
2542 : | if ( $tr->[7] && @{$tr->[7]} || $desc->[7] && @{$desc->[7]} ) { | ||
2543 : | $nn[7] = [ $tr->[7] ? @{$tr->[7]} : (), $desc->[7] ? @{$desc->[7]} : () ]; | ||
2544 : | } | ||
2545 : | |||
2546 : | return \@nn; | ||
2547 : | } | ||
2548 : | |||
2549 : | |||
2550 : | #=============================================================================== | ||
2551 : | # | ||
2552 : | golsen | 1.9 | # Representative subtrees |
2553 : | # | ||
2554 : | #=============================================================================== | ||
2555 : | # Find subtree of size n representating vicinity of the root: | ||
2556 : | # | ||
2557 : | # $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority ) | ||
2558 : | # $subtree = root_neighborhood_representative_tree( $tree, $n ) | ||
2559 : | # | ||
2560 : | # Note that if $tree is rooted, then the subtree will also be. This can have | ||
2561 : | # consequences on downstream programs. | ||
2562 : | #------------------------------------------------------------------------------- | ||
2563 : | sub root_neighborhood_representative_tree | ||
2564 : | { | ||
2565 : | my ( $tree, $n, $tip_priority ) = @_; | ||
2566 : | array_ref( $tree ) && ( $n >= 2 ) or return undef; | ||
2567 : | if ( newick_tip_count( $tree ) <= $n ) { return $tree } | ||
2568 : | |||
2569 : | $tip_priority ||= default_tip_priority( $tree ); | ||
2570 : | my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) } | ||
2571 : | root_proximal_newick_subtrees( $tree, $n ); | ||
2572 : | |||
2573 : | newick_subtree( copy_newick_tree( $tree ), \@tips ); | ||
2574 : | } | ||
2575 : | |||
2576 : | |||
2577 : | #------------------------------------------------------------------------------- | ||
2578 : | # Find n tips to represent tree lineages in vicinity of another tip. | ||
2579 : | # Default tip priority is short total branch length. | ||
2580 : | # | ||
2581 : | # \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
2582 : | # @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
2583 : | # \@tips = root_neighborhood_representative_tips( $tree, $n ) | ||
2584 : | # @tips = root_neighborhood_representative_tips( $tree, $n ) | ||
2585 : | #------------------------------------------------------------------------------- | ||
2586 : | sub root_neighborhood_representative_tips | ||
2587 : | { | ||
2588 : | my ( $tree, $n, $tip_priority ) = @_; | ||
2589 : | array_ref( $tree ) && ( $n >= 2 ) or return undef; | ||
2590 : | |||
2591 : | my @tips; | ||
2592 : | if ( newick_tip_count( $tree ) <= $n ) | ||
2593 : | { | ||
2594 : | @tips = newick_tip_list( $tree ); | ||
2595 : | } | ||
2596 : | else | ||
2597 : | { | ||
2598 : | $tip_priority ||= default_tip_priority( $tree ); | ||
2599 : | @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) } | ||
2600 : | root_proximal_newick_subtrees( $tree, $n ); | ||
2601 : | } | ||
2602 : | |||
2603 : | wantarray ? @tips : \@tips; | ||
2604 : | } | ||
2605 : | |||
2606 : | |||
2607 : | #------------------------------------------------------------------------------- | ||
2608 : | # Find subtree of size n representating vicinity of a tip: | ||
2609 : | # | ||
2610 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority ) | ||
2611 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n ) | ||
2612 : | #------------------------------------------------------------------------------- | ||
2613 : | sub tip_neighborhood_representative_tree | ||
2614 : | { | ||
2615 : | my ( $tree, $tip, $n, $tip_priority ) = @_; | ||
2616 : | array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef; | ||
2617 : | newick_tip_in_tree( $tree, $tip ) or return undef; | ||
2618 : | |||
2619 : | my $tree1 = copy_newick_tree( $tree ); | ||
2620 : | if ( newick_tip_count( $tree1 ) - 1 <= $n ) | ||
2621 : | { | ||
2622 : | return prune_from_newick( $tree1, $tip ) | ||
2623 : | } | ||
2624 : | |||
2625 : | $tree1 = reroot_newick_to_tip( $tree1, $tip ); | ||
2626 : | $tree1 = newick_desc_i( $tree1, 1 ); # Node immediately below tip | ||
2627 : | my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority ); | ||
2628 : | newick_subtree( copy_newick_tree( $tree ), \@tips ); | ||
2629 : | } | ||
2630 : | |||
2631 : | |||
2632 : | #------------------------------------------------------------------------------- | ||
2633 : | # Find n tips to represent tree lineages in vicinity of another tip. | ||
2634 : | # Default tip priority is short total branch length. | ||
2635 : | # | ||
2636 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
2637 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
2638 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
2639 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
2640 : | #------------------------------------------------------------------------------- | ||
2641 : | sub tip_neighborhood_representative_tips | ||
2642 : | { | ||
2643 : | my ( $tree, $tip, $n, $tip_priority ) = @_; | ||
2644 : | array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef; | ||
2645 : | newick_tip_in_tree( $tree, $tip ) or return undef; | ||
2646 : | |||
2647 : | my @tips = newick_tip_list( $tree ); | ||
2648 : | if ( newick_tip_count( $tree ) - 1 <= $n ) | ||
2649 : | { | ||
2650 : | @tips = grep { $_ ne $tip } @tips; | ||
2651 : | } | ||
2652 : | else | ||
2653 : | { | ||
2654 : | my $tree1 = copy_newick_tree( $tree ); | ||
2655 : | $tree1 = reroot_newick_to_tip( $tree1, $tip ); | ||
2656 : | $tree1 = newick_desc_i( $tree1, 1 ); # Node immediately below tip | ||
2657 : | @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority ); | ||
2658 : | } | ||
2659 : | |||
2660 : | wantarray ? @tips : \@tips; | ||
2661 : | } | ||
2662 : | |||
2663 : | |||
2664 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2665 : | # Anonymous hash of the negative distance from root to each tip: | ||
2666 : | # | ||
2667 : | # \%tip_priority = default_tip_priority( $tree ) | ||
2668 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2669 : | sub default_tip_priority | ||
2670 : | { | ||
2671 : | my ( $tree ) = @_; | ||
2672 : | my $tip_distances = newick_tip_distances( $tree ) || {}; | ||
2673 : | return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances }; | ||
2674 : | } | ||
2675 : | |||
2676 : | |||
2677 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2678 : | # Select a tip from a subtree base on a priority value: | ||
2679 : | # | ||
2680 : | # $tip = representative_tip_of_newick_node( $node, \%tip_priority ) | ||
2681 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2682 : | sub representative_tip_of_newick_node | ||
2683 : | { | ||
2684 : | my ( $node, $tip_priority ) = @_; | ||
2685 : | my ( $tip ) = sort { $b->[1] <=> $a->[1] } # The best | ||
2686 : | map { [ $_, $tip_priority->{ $_ } ] } | ||
2687 : | newick_tip_list( $node ); | ||
2688 : | $tip->[0]; # Label from label-priority pair | ||
2689 : | } | ||
2690 : | |||
2691 : | |||
2692 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2693 : | # Find n subtrees focused around the root of a tree. Typically each will | ||
2694 : | # then be reduced to a single tip to make a representative tree: | ||
2695 : | # | ||
2696 : | # @subtrees = root_proximal_newick_subtrees( $tree, $n ) | ||
2697 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2698 : | sub root_proximal_newick_subtrees | ||
2699 : | { | ||
2700 : | my ( $tree, $n ) = @_; | ||
2701 : | my $node_start_end = newick_branch_intervals( $tree ); | ||
2702 : | n_representative_branches( $n, $node_start_end ); | ||
2703 : | } | ||
2704 : | |||
2705 : | |||
2706 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2707 : | # @node_start_end = newick_branch_intervals( $tree ) | ||
2708 : | # \@node_start_end = newick_branch_intervals( $tree ) | ||
2709 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2710 : | sub newick_branch_intervals | ||
2711 : | { | ||
2712 : | my ( $node, $parent_x ) = @_; | ||
2713 : | $parent_x ||= 0; | ||
2714 : | my ( $desc, undef, $dx ) = @$node; | ||
2715 : | my $x = $parent_x + $dx; | ||
2716 : | my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ]; | ||
2717 : | my @intervals = ( $interval, | ||
2718 : | map { &newick_branch_intervals( $_, $x ) } @$desc | ||
2719 : | ); | ||
2720 : | return wantarray ? @intervals : \@intervals; | ||
2721 : | } | ||
2722 : | |||
2723 : | |||
2724 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2725 : | # @ids = n_representative_branches( $n, @id_start_end ) | ||
2726 : | # @ids = n_representative_branches( $n, \@id_start_end ) | ||
2727 : | # \@ids = n_representative_branches( $n, @id_start_end ) | ||
2728 : | # \@ids = n_representative_branches( $n, \@id_start_end ) | ||
2729 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2730 : | sub n_representative_branches | ||
2731 : | { | ||
2732 : | my $n = shift; | ||
2733 : | # Sort intervals by start point: | ||
2734 : | my @unprocessed = sort { $a->[1] <=> $b->[1] } | ||
2735 : | ( @_ == 1 ) ? @{ $_[0] } : @_; | ||
2736 : | my @active = (); | ||
2737 : | my ( $interval, $current_point ); | ||
2738 : | foreach $interval ( @unprocessed ) | ||
2739 : | { | ||
2740 : | $current_point = $interval->[1]; | ||
2741 : | # Filter out intervals that have ended. This is N**2 in the number | ||
2742 : | # of representatives. Fixing this would require maintaining a sorted | ||
2743 : | # active list. | ||
2744 : | @active = grep { $_->[2] > $current_point } @active; | ||
2745 : | push @active, $interval; | ||
2746 : | last if ( @active >= $n ); | ||
2747 : | } | ||
2748 : | |||
2749 : | my @ids = map { $_->[0] } @active; | ||
2750 : | return wantarray() ? @ids : \@ids; | ||
2751 : | } | ||
2752 : | |||
2753 : | |||
2754 : | #=============================================================================== | ||
2755 : | # | ||
2756 : | golsen | 1.1 | # Tree writing and reading |
2757 : | # | ||
2758 : | #=============================================================================== | ||
2759 : | overbeek | 1.7 | # writeNewickTree( $tree ) |
2760 : | # writeNewickTree( $tree, $file ) | ||
2761 : | # writeNewickTree( $tree, \*FH ) | ||
2762 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
2763 : | sub writeNewickTree { | ||
2764 : | my ( $tree, $file ) = @_; | ||
2765 : | overbeek | 1.7 | my ( $fh, $close ) = open_output( $file ); |
2766 : | $fh or return; | ||
2767 : | print $fh ( strNewickTree( $tree ), "\n" ); | ||
2768 : | close $fh if $close; | ||
2769 : | golsen | 1.1 | } |
2770 : | |||
2771 : | |||
2772 : | #------------------------------------------------------------------------------- | ||
2773 : | # fwriteNewickTree( $file, $tree ) # Args reversed to writeNewickTree | ||
2774 : | #------------------------------------------------------------------------------- | ||
2775 : | sub fwriteNewickTree { writeNewickTree( $_[1], $_[0] ) } | ||
2776 : | |||
2777 : | |||
2778 : | #------------------------------------------------------------------------------- | ||
2779 : | # $treestring = strNewickTree( $tree ) | ||
2780 : | #------------------------------------------------------------------------------- | ||
2781 : | sub strNewickTree { | ||
2782 : | my $node = shift @_; | ||
2783 : | strNewickSubtree( $node, "" ) . ";"; | ||
2784 : | } | ||
2785 : | |||
2786 : | |||
2787 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2788 : | # $string = strNewickSubtree( $node, $prefix ) | ||
2789 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2790 : | sub strNewickSubtree { | ||
2791 : | my ( $node, $prefix ) = @_; | ||
2792 : | my $s; | ||
2793 : | |||
2794 : | $s = strNewickComments( newick_c1( $node ), $prefix ); | ||
2795 : | if ( $s ) { $prefix = " " } | ||
2796 : | |||
2797 : | my $ndesc; | ||
2798 : | if ( $ndesc = newick_n_desc( $node ) ) { | ||
2799 : | for (my $d = 1; $d <= $ndesc; $d++) { | ||
2800 : | $s .= ( ( $d == 1 ) ? $prefix . "(" : "," ) | ||
2801 : | . strNewickSubtree( newick_desc_i( $node, $d ), " " ); | ||
2802 : | } | ||
2803 : | |||
2804 : | $s .= ")" . strNewickComments( newick_c2( $node ), " " ); | ||
2805 : | $prefix = " "; | ||
2806 : | } | ||
2807 : | |||
2808 : | if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) { | ||
2809 : | $s .= $prefix | ||
2810 : | . q_newick_lbl( $node ) | ||
2811 : | . strNewickComments( newick_c3( $node ), " " ); | ||
2812 : | } | ||
2813 : | |||
2814 : | if ( defined( newick_x( $node ) ) ) { | ||
2815 : | $s .= ":" | ||
2816 : | . strNewickComments( newick_c4( $node ), " " ) | ||
2817 : | . sprintf( " %.6f", newick_x( $node ) ) | ||
2818 : | . strNewickComments( newick_c5( $node ), " " ); | ||
2819 : | } | ||
2820 : | |||
2821 : | $s; | ||
2822 : | } | ||
2823 : | |||
2824 : | |||
2825 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2826 : | # $string = strNewickComments( $clist, $prefix ) | ||
2827 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2828 : | sub strNewickComments { | ||
2829 : | my ( $clist, $prefix ) = @_; | ||
2830 : | array_ref( $clist ) && ( @$clist > 0 ) || return ""; | ||
2831 : | $prefix . "[" . join( "] [", @$clist ) . "]"; | ||
2832 : | } | ||
2833 : | |||
2834 : | |||
2835 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2836 : | # $quoted_label = q_newick_lbl( $label ) | ||
2837 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2838 : | sub q_newick_lbl { | ||
2839 : | my $lbl = newick_lbl( $_[0] ); | ||
2840 : | defined( $lbl ) && ( $lbl ne "" ) || return undef; | ||
2841 : | |||
2842 : | if ( $lbl =~ m/^[^][()_:;,]+$/ # Anything but []()_:;, | ||
2843 : | && $lbl !~ m/^'/ ) { # and does not start with ' | ||
2844 : | $lbl =~ s/ /_/g; # Recode blanks as _ | ||
2845 : | return $lbl; | ||
2846 : | } | ||
2847 : | |||
2848 : | else { | ||
2849 : | $lbl =~ s/'/''/g; # Double existing single quote marks | ||
2850 : | return q(') . $lbl . q('); # Wrap in single quote marks | ||
2851 : | } | ||
2852 : | } | ||
2853 : | |||
2854 : | |||
2855 : | #=============================================================================== | ||
2856 : | # $treestring = formatNewickTree( $tree ) | ||
2857 : | #=============================================================================== | ||
2858 : | sub formatNewickTree { | ||
2859 : | formatNewickSubtree( $_[0], "", "" ) . ";"; | ||
2860 : | } | ||
2861 : | |||
2862 : | |||
2863 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2864 : | # $string = formatNewickSubtree( $node, $prefix, $indent ) | ||
2865 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2866 : | sub formatNewickSubtree { | ||
2867 : | my ( $node, $prefix, $indent ) = @_; | ||
2868 : | my $s; | ||
2869 : | |||
2870 : | $s = formatNewickComments( newick_c1( $node ), $prefix, $indent ); | ||
2871 : | if ( $s ) { $prefix = "\n$indent" } | ||
2872 : | |||
2873 : | if ( my $ndesc = newick_n_desc( $node ) ) { | ||
2874 : | for (my $d = 1; $d <= $ndesc; $d++) { | ||
2875 : | $s .= ( ( $d == 1 ) ? $prefix . "(" : ",\n$indent " ) | ||
2876 : | . formatNewickSubtree( newick_desc_i( $node, $d ), " ", $indent . " " ); | ||
2877 : | } | ||
2878 : | |||
2879 : | $s .= "\n$indent)" . formatNewickComments( newick_c2( $node ), " ", $indent ); | ||
2880 : | $prefix = " "; | ||
2881 : | } | ||
2882 : | |||
2883 : | if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) { | ||
2884 : | $s .= $prefix | ||
2885 : | . q_newick_lbl( $node ) | ||
2886 : | . formatNewickComments( newick_c3( $node ), " ", $indent ); | ||
2887 : | } | ||
2888 : | |||
2889 : | if ( defined( newick_x( $node ) ) ) { | ||
2890 : | $s .= ":" | ||
2891 : | . formatNewickComments( newick_c4( $node ), " ", $indent ) | ||
2892 : | . sprintf(" %.6f", newick_x( $node ) ) | ||
2893 : | . formatNewickComments( newick_c5( $node ), " ", $indent ); | ||
2894 : | } | ||
2895 : | |||
2896 : | $s; | ||
2897 : | } | ||
2898 : | |||
2899 : | |||
2900 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2901 : | # $string = formatNewickComments( $clist, $prefix, $indent ) | ||
2902 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2903 : | sub formatNewickComments { | ||
2904 : | my ( $clist, $prefix, $indent ) = @_; | ||
2905 : | array_ref( $clist ) && @$clist || return ""; | ||
2906 : | $prefix . "[" . join( "] [", @$clist ) . "]"; | ||
2907 : | } | ||
2908 : | |||
2909 : | |||
2910 : | #=============================================================================== | ||
2911 : | overbeek | 1.7 | # $tree = read_newick_tree( $file ) # reads to a semicolon |
2912 : | # @trees = read_newick_trees( $file ) # reads to end of file | ||
2913 : | #=============================================================================== | ||
2914 : | |||
2915 : | sub read_newick_tree | ||
2916 : | { | ||
2917 : | my $file = shift; | ||
2918 : | my ( $fh, $close ) = open_input( $file ); | ||
2919 : | my $tree; | ||
2920 : | my @lines = (); | ||
2921 : | golsen | 1.9 | foreach ( <$fh> ) |
2922 : | overbeek | 1.7 | { |
2923 : | chomp; | ||
2924 : | push @lines, $_; | ||
2925 : | if ( /;/ ) | ||
2926 : | { | ||
2927 : | $tree = parse_newick_tree_str( join( ' ', @lines ) ); | ||
2928 : | last; | ||
2929 : | } | ||
2930 : | } | ||
2931 : | close $fh if $close; | ||
2932 : | |||
2933 : | $tree; | ||
2934 : | } | ||
2935 : | |||
2936 : | |||
2937 : | sub read_newick_trees | ||
2938 : | { | ||
2939 : | my $file = shift; | ||
2940 : | my ( $fh, $close ) = open_input( $file ); | ||
2941 : | my @trees = (); | ||
2942 : | my @lines = (); | ||
2943 : | golsen | 1.9 | foreach ( <$fh> ) |
2944 : | overbeek | 1.7 | { |
2945 : | chomp; | ||
2946 : | push @lines, $_; | ||
2947 : | if ( /;/ ) | ||
2948 : | { | ||
2949 : | push @trees, parse_newick_tree_str( join( ' ', @lines ) ); | ||
2950 : | @lines = () | ||
2951 : | } | ||
2952 : | } | ||
2953 : | close $fh if $close; | ||
2954 : | |||
2955 : | @trees; | ||
2956 : | } | ||
2957 : | |||
2958 : | |||
2959 : | #=============================================================================== | ||
2960 : | golsen | 1.1 | # Tree reader adapted from the C language reader in fastDNAml |
2961 : | # | ||
2962 : | # $tree = parse_newick_tree_str( $string ) | ||
2963 : | #=============================================================================== | ||
2964 : | sub parse_newick_tree_str { | ||
2965 : | my $s = shift @_; | ||
2966 : | |||
2967 : | my ( $ind, $rootnode ) = parse_newick_subtree( $s, 0 ); | ||
2968 : | if ( substr( $s, $ind, 1 ) ne ";") { warn "warning: tree missing ';'\n" } | ||
2969 : | $rootnode; | ||
2970 : | } | ||
2971 : | |||
2972 : | |||
2973 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2974 : | # Read a subtrees recursively (everything of tree but a semicolon) | ||
2975 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2976 : | sub parse_newick_subtree { | ||
2977 : | my ( $s, $ind ) = @_; | ||
2978 : | |||
2979 : | my $newnode = []; | ||
2980 : | my @dlist = (); | ||
2981 : | my ( $lbl, $x, $c1, $c2, $c3, $c4, $c5 ); | ||
2982 : | golsen | 1.9 | |
2983 : | golsen | 1.1 | ( $ind, $c1 ) = getNextTreeChar( $s, $ind ); # Comment 1 |
2984 : | if ( ! defined( $ind ) ) { treeParseError( "missing subtree" ) } | ||
2985 : | if ( $c1 && @$c1 ) { set_newick_c1( $newnode, $c1 ) } | ||
2986 : | |||
2987 : | if ( substr( $s, $ind, 1 ) eq "(" ) { # New internal node | ||
2988 : | while ( ! @dlist || ( substr( $s, $ind, 1 ) eq "," ) ) { | ||
2989 : | my $desc; | ||
2990 : | ( $ind, $desc ) = parse_newick_subtree( $s, $ind+1 ); | ||
2991 : | if (! $ind) { return () } | ||
2992 : | push @dlist, $desc; | ||
2993 : | } | ||
2994 : | if ( substr( $s, $ind, 1 ) ne ")" ) { treeParseError( "missing ')'" ) } | ||
2995 : | |||
2996 : | ( $ind, $c2 ) = getNextTreeChar( $s, $ind+1 ); # Comment 2 | ||
2997 : | if ( $c2 && @$c2 ) { set_newick_c2( $newnode, $c2 ) } | ||
2998 : | ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); # Node label | ||
2999 : | } | ||
3000 : | |||
3001 : | elsif ( substr( $s, $ind, 1 ) =~ /[^][(,):;]/ ) { # New tip | ||
3002 : | ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); # Tip label | ||
3003 : | if (! $ind) { return () } | ||
3004 : | } | ||
3005 : | |||
3006 : | @dlist || $lbl || treeParseError( "no descendant list or label" ); | ||
3007 : | |||
3008 : | if ( @dlist ) { set_newick_desc_ref( $newnode, \@dlist ) } | ||
3009 : | if ( $lbl ) { set_newick_lbl( $newnode, $lbl ) } | ||
3010 : | |||
3011 : | ( $ind, $c3 ) = getNextTreeChar( $s, $ind ); # Comment 3 | ||
3012 : | if ( $c3 && @$c3 ) { set_newick_c3( $newnode, $c3 ) } | ||
3013 : | |||
3014 : | if (substr( $s, $ind, 1 ) eq ":") { # Branch length | ||
3015 : | ( $ind, $c4 ) = getNextTreeChar( $s, $ind+1 ); # Comment 4 | ||
3016 : | if ( $c4 && @$c4 ) { set_newick_c4( $newnode, $c4 ) } | ||
3017 : | ( $ind, $x ) = parseBranchLength( $s, $ind ); | ||
3018 : | if ( defined( $x ) ) { set_newick_x( $newnode, $x ) } | ||
3019 : | ( $ind, $c5 ) = getNextTreeChar( $s, $ind ); # Comment 5 | ||
3020 : | if ( $c5 && @$c5 ) { set_newick_c5( $newnode, $c5 ) } | ||
3021 : | } | ||
3022 : | |||
3023 : | ( $ind, $newnode ); | ||
3024 : | } | ||
3025 : | |||
3026 : | |||
3027 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3028 : | # Read a Newick tree label | ||
3029 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3030 : | sub parseTreeNodeLabel { # Empty string is permitted | ||
3031 : | my ( $s, $ind ) = @_; | ||
3032 : | my ( $lbl, $c ); | ||
3033 : | |||
3034 : | if ( substr( $s, $ind, 1 ) eq "'") { | ||
3035 : | my $ind1 = ++$ind; | ||
3036 : | |||
3037 : | while ( ) { | ||
3038 : | if ( ! defined( $c = substr( $s, $ind, 1 ) ) || $c eq "" ) { | ||
3039 : | treeParseError( "missing close quote on label '" . substr( $s, $ind1 ) . "'" ) | ||
3040 : | } | ||
3041 : | elsif ( $c ne "'" ) { $ind++ } | ||
3042 : | elsif ( substr( $s, $ind, 2 ) eq "''" ) { $ind += 2 } | ||
3043 : | else { last } | ||
3044 : | } | ||
3045 : | |||
3046 : | $lbl = substr( $s, $ind1, $ind-$ind1 ); | ||
3047 : | $lbl =~ s/''/'/g; | ||
3048 : | $ind++; | ||
3049 : | } | ||
3050 : | |||
3051 : | else { | ||
3052 : | my $ind1 = $ind; | ||
3053 : | while ( defined( $c = substr($s, $ind, 1) ) && $c ne "" && $c !~ /[][\s(,):;]/ ) { $ind++ } | ||
3054 : | $lbl = substr( $s, $ind1, $ind-$ind1 ); | ||
3055 : | $lbl =~ s/_/ /g; | ||
3056 : | } | ||
3057 : | |||
3058 : | ( $ind, $lbl ); | ||
3059 : | } | ||
3060 : | |||
3061 : | |||
3062 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3063 : | # Read a Newick tree branch length | ||
3064 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3065 : | sub parseBranchLength { | ||
3066 : | my ( $s, $ind ) = @_; | ||
3067 : | |||
3068 : | my $c = substr( $s, $ind, 1 ); | ||
3069 : | |||
3070 : | my $sign = ( $c ne "-" ) ? 1 : -1; # Sign | ||
3071 : | if ( $c =~ /[-+]/ ) { $c = substr( $s, ++$ind, 1 ) } | ||
3072 : | |||
3073 : | if ( $c !~ /^[.0-9]$/ ) { # Allows starting with decimal | ||
3074 : | treeParseError( "invalid branch length character '$c'" ) | ||
3075 : | } | ||
3076 : | |||
3077 : | my $v = 0; | ||
3078 : | while ( $c =~ /[0-9]/ ) { # Whole number | ||
3079 : | $v = 10 * $v + $c; | ||
3080 : | $c = substr( $s, ++$ind, 1 ); | ||
3081 : | } | ||
3082 : | |||
3083 : | if ( $c eq "." ) { # Fraction | ||
3084 : | my $f = 0.1; | ||
3085 : | $c = substr( $s, ++$ind, 1 ); | ||
3086 : | while ( $c =~ /[0-9]/ ) { | ||
3087 : | $v += $f * $c; | ||
3088 : | $f *= 0.1; | ||
3089 : | $c = substr( $s, ++$ind, 1 ); | ||
3090 : | } | ||
3091 : | } | ||
3092 : | |||
3093 : | $v *= $sign; | ||
3094 : | |||
3095 : | if ( $c =~ /[dDeEgG]/ ) { # Exponent | ||
3096 : | $c = substr( $s, ++$ind, 1 ); | ||
3097 : | my $esign = ( $c ne "-" ) ? 1 : -1; | ||
3098 : | if ( $c =~ /^[-+]$/ ) { $c = substr( $s, ++$ind, 1 ) } | ||
3099 : | if ( $c !~ /^[0-9]$/ ) { | ||
3100 : | treeParseError( "missing branch length exponent '$c'" ) | ||
3101 : | } | ||
3102 : | |||
3103 : | my $e = 0; | ||
3104 : | while ( $c =~ /[0-9]/ ) { | ||
3105 : | $e = 10 * $e + $c; | ||
3106 : | $c = substr( $s, ++$ind, 1 ); | ||
3107 : | } | ||
3108 : | $e *= $esign; | ||
3109 : | $v *= 10**$e; | ||
3110 : | } | ||
3111 : | |||
3112 : | ( $ind, $v ); | ||
3113 : | } | ||
3114 : | |||
3115 : | |||
3116 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3117 : | # ( $index, /@commentlist ) = getNextTreeChar( $string, $index ) | ||
3118 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3119 : | sub getNextTreeChar { # Move to next nonblank, noncomment character | ||
3120 : | my ( $s, $ind ) = @_; | ||
3121 : | |||
3122 : | my @clist = (); | ||
3123 : | |||
3124 : | # Skip white space | ||
3125 : | if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) } | ||
3126 : | |||
3127 : | # Loop while it is a comment: | ||
3128 : | while ( substr( $s, $ind, 1 ) eq "[" ) { | ||
3129 : | $ind++; | ||
3130 : | |||
3131 : | # Find end | ||
3132 : | if ( substr( $s, $ind ) !~ /^([^]]*)\]/ ) { | ||
3133 : | treeParseError( "comment missing closing bracket '[" | ||
3134 : | . substr( $s, $ind ) . "'" ) | ||
3135 : | } | ||
3136 : | my $comment = $1; | ||
3137 : | |||
3138 : | # Save if it includes any "text" | ||
3139 : | if ( $comment =~ m/\S/ ) { push @clist, $comment } | ||
3140 : | |||
3141 : | $ind += length( $comment ) + 1; # Comment plus closing bracket | ||
3142 : | |||
3143 : | # Skip white space | ||
3144 : | if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) } | ||
3145 : | } | ||
3146 : | |||
3147 : | ( $ind, @clist ? \@clist : undef ) | ||
3148 : | } | ||
3149 : | |||
3150 : | |||
3151 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3152 : | # treeParseError( $message ) | ||
3153 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3154 : | sub treeParseError { die "Error: parse_newick_subtree: " . $_[0] . "\n" } | ||
3155 : | |||
3156 : | |||
3157 : | #=============================================================================== | ||
3158 : | # Make a printer plot of a tree: | ||
3159 : | # | ||
3160 : | # $node newick tree root node | ||
3161 : | overbeek | 1.4 | # $file undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name. |
3162 : | golsen | 1.1 | # $width the approximate characters for the tree without labels |
3163 : | # $min_dx the minimum horizontal branch length | ||
3164 : | # $dy the vertical space per taxon | ||
3165 : | # | ||
3166 : | overbeek | 1.4 | # printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) ) |
3167 : | golsen | 1.1 | #=============================================================================== |
3168 : | sub printer_plot_newick { | ||
3169 : | my ( $node, $file, $width, $min_dx, $dy ) = @_; | ||
3170 : | |||
3171 : | overbeek | 1.7 | my ( $fh, $close ) = open_output( $file ); |
3172 : | $fh or return; | ||
3173 : | golsen | 1.1 | |
3174 : | print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n"; | ||
3175 : | if ( $close ) { close $fh } | ||
3176 : | } | ||
3177 : | |||
3178 : | |||
3179 : | #=============================================================================== | ||
3180 : | # Make a text plot of a tree: | ||
3181 : | # | ||
3182 : | # $node newick tree root node | ||
3183 : | # $width the approximate characters for the tree without labels | ||
3184 : | # $min_dx the minimum horizontal branch length | ||
3185 : | # $dy the vertical space per taxon | ||
3186 : | # | ||
3187 : | # @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) ) | ||
3188 : | #=============================================================================== | ||
3189 : | sub text_plot_newick { | ||
3190 : | my ( $node, $width, $min_dx, $dy ) = @_; | ||
3191 : | array_ref( $node ) || die "Bad node passed to text_plot_newick\n"; | ||
3192 : | defined( $min_dx ) and ( $min_dx >= 0 ) or $min_dx = 2; | ||
3193 : | defined( $dy ) and ( $dy >= 1 ) or $dy = 1; | ||
3194 : | defined( $width ) or $width = 68; | ||
3195 : | |||
3196 : | $min_dx = int( $min_dx ); | ||
3197 : | $dy = int( $dy ); | ||
3198 : | golsen | 1.5 | my $x_scale = $width / ( newick_max_X( $node ) || 1 ); # Div by zero caught by RAE |
3199 : | golsen | 1.1 | |
3200 : | my $hash = {}; | ||
3201 : | layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy ); | ||
3202 : | |||
3203 : | # dump_tree_hash( $node, $hash ); exit; | ||
3204 : | |||
3205 : | # Generate the lines of the tree one by one: | ||
3206 : | |||
3207 : | my ( $y1, $y2 ) = @{ $hash->{ $node } }; | ||
3208 : | map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 ); | ||
3209 : | } | ||
3210 : | |||
3211 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3212 : | # ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) | ||
3213 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3214 : | sub layout_printer_plot { | ||
3215 : | my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_; | ||
3216 : | array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n"; | ||
3217 : | hash_ref( $hash ) || die "Bad hash ref passed to layout_printer_plot\n"; | ||
3218 : | |||
3219 : | my $dx = newick_x( $node ); | ||
3220 : | if ( defined( $dx ) ) { | ||
3221 : | $dx *= $x_scale; | ||
3222 : | $dx >= $min_dx or $dx = $min_dx; | ||
3223 : | } | ||
3224 : | else { | ||
3225 : | $dx = ( $x0 > 0 ) ? $min_dx : 0; | ||
3226 : | } | ||
3227 : | $dx = int( $dx + 0.4999 ); | ||
3228 : | |||
3229 : | my ( $x, $xmax, $y, $ymax, $y1, $y2, $yn1, $yn2 ); | ||
3230 : | |||
3231 : | $x = $x0 + $dx; | ||
3232 : | $y1 = int( $y0 + 0.5 * $dy + 0.4999 ); | ||
3233 : | my @dl = newick_desc_list( $node ); | ||
3234 : | |||
3235 : | if ( ! @dl ) { # A tip | ||
3236 : | $xmax = $x; | ||
3237 : | $y = $yn1 = $yn2 = $y2 = $y1; | ||
3238 : | $ymax = $y + 0.5 * $dy; | ||
3239 : | } | ||
3240 : | |||
3241 : | else { # A subtree | ||
3242 : | $xmax = -1; | ||
3243 : | my $xmaxi; | ||
3244 : | my $yi; | ||
3245 : | my @ylist = (); | ||
3246 : | $ymax = $y0; | ||
3247 : | |||
3248 : | foreach ( @dl ) { | ||
3249 : | ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy ); | ||
3250 : | push @ylist, $yi; | ||
3251 : | if ( $xmaxi > $xmax ) { $xmax = $xmaxi } | ||
3252 : | } | ||
3253 : | |||
3254 : | # Use of y-list is overkill for saving first and last values, | ||
3255 : | # but eases implimentation of alternative y-value calculations. | ||
3256 : | |||
3257 : | $yn1 = $ylist[ 0]; | ||
3258 : | $yn2 = $ylist[-1]; | ||
3259 : | $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 ); | ||
3260 : | } | ||
3261 : | |||
3262 : | $y2 = int( $ymax - 0.5 * $dy + 0.4999 ); | ||
3263 : | |||
3264 : | $hash->{ $node } = [ $y1, $y2, $x0, $x, $y, $yn1, $yn2 ]; | ||
3265 : | ( $xmax, $ymax, $y ); | ||
3266 : | } | ||
3267 : | |||
3268 : | |||
3269 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3270 : | # Debug routine | ||
3271 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3272 : | sub dump_tree { | ||
3273 : | my ( $node, $prefix ) = @_; | ||
3274 : | defined( $prefix ) or $prefix = ""; | ||
3275 : | print STDERR $prefix, join(", ", @$node), "\n"; | ||
3276 : | my @dl = $node->[0] ? @{$node->[0]} : (); | ||
3277 : | foreach ( @dl ) { dump_tree( $_, $prefix . " " ) } | ||
3278 : | $prefix or print STDERR "\n"; | ||
3279 : | } | ||
3280 : | |||
3281 : | |||
3282 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3283 : | # Debug routine | ||
3284 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3285 : | sub dump_tree_hash { | ||
3286 : | my ( $node, $hash, $prefix ) = @_; | ||
3287 : | defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = ""; | ||
3288 : | print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n"; | ||
3289 : | my @dl = $node->[0] ? @{$node->[0]} : (); | ||
3290 : | foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . " " ) } | ||
3291 : | } | ||
3292 : | |||
3293 : | |||
3294 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3295 : | # $line = text_tree_row( $node, $hash, $row, $line, $symb ) | ||
3296 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3297 : | sub text_tree_row { | ||
3298 : | my ( $node, $hash, $row, $line, $symb ) = @_; | ||
3299 : | |||
3300 : | my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } }; | ||
3301 : | if ( $row < $y1 || $row > $y2 ) { return $line } | ||
3302 : | |||
3303 : | if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) } | ||
3304 : | |||
3305 : | if ( $row == $y ) { | ||
3306 : | $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : ""); | ||
3307 : | } | ||
3308 : | |||
3309 : | elsif ( $row > $yn1 && $row < $yn2 ) { | ||
3310 : | if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" } | ||
3311 : | else { substr( $line, $x ) = "|" } | ||
3312 : | } | ||
3313 : | |||
3314 : | my @dl = newick_desc_list( $node ); | ||
3315 : | |||
3316 : | if ( @dl < 1 ) { | ||
3317 : | $line .= " " . $node->[1]; | ||
3318 : | } | ||
3319 : | |||
3320 : | else { | ||
3321 : | my @list = map { [ $_, "+" ] } @dl; # Print symbol for line | ||
3322 : | $list[ 0]->[1] = "/"; | ||
3323 : | $list[-1]->[1] = "\\"; | ||
3324 : | |||
3325 : | foreach ( @list ) { | ||
3326 : | my ( $n, $s ) = @$_; | ||
3327 : | if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) { | ||
3328 : | $line = text_tree_row( $n, $hash, $row, $line, $s ); | ||
3329 : | } | ||
3330 : | } | ||
3331 : | |||
3332 : | if ( $row == $y ) { substr( $line, $x, 1 ) = "+" } | ||
3333 : | } | ||
3334 : | |||
3335 : | return $line; | ||
3336 : | } | ||
3337 : | |||
3338 : | |||
3339 : | overbeek | 1.7 | #=============================================================================== |
3340 : | # Open an input file stream: | ||
3341 : | # | ||
3342 : | # ( $handle, undef ) = open_input( ); # \*STDIN | ||
3343 : | # ( $handle, undef ) = open_input( \*FH ); | ||
3344 : | # ( $handle, 1 ) = open_input( $file ); # need to close $handle | ||
3345 : | # | ||
3346 : | #=============================================================================== | ||
3347 : | sub open_input | ||
3348 : | { | ||
3349 : | my $file = shift; | ||
3350 : | my $fh; | ||
3351 : | if ( ! defined( $file ) ) { return ( \*STDIN ) } | ||
3352 : | elsif ( ref( $file ) eq 'GLOB' ) { return ( $file ) } | ||
3353 : | elsif ( open( $fh, "<$file" ) ) { return ( $fh, 1 ) } # Need to close | ||
3354 : | |||
3355 : | print STDERR "gjonewick::open_input could not open '$file' for reading\n"; | ||
3356 : | return undef; | ||
3357 : | } | ||
3358 : | |||
3359 : | |||
3360 : | #=============================================================================== | ||
3361 : | # Open an output file stream: | ||
3362 : | # | ||
3363 : | # ( $handle, undef ) = open_output( ); # \*STDOUT | ||
3364 : | # ( $handle, undef ) = open_output( \*FH ); | ||
3365 : | # ( $handle, 1 ) = open_output( $file ); # need to close $handle | ||
3366 : | # | ||
3367 : | #=============================================================================== | ||
3368 : | sub open_output | ||
3369 : | { | ||
3370 : | my $file = shift; | ||
3371 : | my $fh; | ||
3372 : | if ( ! defined( $file ) ) { return ( \*STDOUT ) } | ||
3373 : | elsif ( ref( $file ) eq 'GLOB' ) { return ( $file ) } | ||
3374 : | elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1 ) } # Need to close | ||
3375 : | |||
3376 : | print STDERR "gjonewick::open_output could not open '$file' for writing\n"; | ||
3377 : | return undef; | ||
3378 : | } | ||
3379 : | |||
3380 : | golsen | 1.9 | |
3381 : | #=============================================================================== | ||
3382 : | # Some subroutines copied from gjolists | ||
3383 : | #=============================================================================== | ||
3384 : | # Return the common prefix of two lists: | ||
3385 : | # | ||
3386 : | # @common = common_prefix( \@list1, \@list2 ) | ||
3387 : | #----------------------------------------------------------------------------- | ||
3388 : | sub common_prefix | ||
3389 : | { | ||
3390 : | my ($l1, $l2) = @_; | ||
3391 : | ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n"; | ||
3392 : | ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n"; | ||
3393 : | |||
3394 : | my $i = 0; | ||
3395 : | my $l1_i; | ||
3396 : | while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ } | ||
3397 : | |||
3398 : | return @$l1[ 0 .. ($i-1) ]; # perl handles negative range | ||
3399 : | } | ||
3400 : | |||
3401 : | |||
3402 : | #----------------------------------------------------------------------------- | ||
3403 : | # Return the unique suffixes of each of two lists: | ||
3404 : | # | ||
3405 : | # ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 ) | ||
3406 : | #----------------------------------------------------------------------------- | ||
3407 : | sub unique_suffixes | ||
3408 : | { | ||
3409 : | my ($l1, $l2) = @_; | ||
3410 : | ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n"; | ||
3411 : | ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n"; | ||
3412 : | |||
3413 : | my $i = 0; | ||
3414 : | my @l1 = @$l1; | ||
3415 : | my @l2 = @$l2; | ||
3416 : | my $l1_i; | ||
3417 : | while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ } | ||
3418 : | |||
3419 : | splice @l1, 0, $i; | ||
3420 : | splice @l2, 0, $i; | ||
3421 : | return ( \@l1, \@l2 ); | ||
3422 : | } | ||
3423 : | |||
3424 : | |||
3425 : | #------------------------------------------------------------------------------- | ||
3426 : | # List of values duplicated in a list (stable in order by second occurance): | ||
3427 : | # | ||
3428 : | # @dups = duplicates( @list ) | ||
3429 : | #------------------------------------------------------------------------------- | ||
3430 : | sub duplicates | ||
3431 : | { | ||
3432 : | my %cnt = (); | ||
3433 : | grep { ++$cnt{$_} == 2 } @_; | ||
3434 : | } | ||
3435 : | |||
3436 : | |||
3437 : | #------------------------------------------------------------------------------- | ||
3438 : | # Randomize the order of a list: | ||
3439 : | # | ||
3440 : | # @random = random_order( @list ) | ||
3441 : | #------------------------------------------------------------------------------- | ||
3442 : | sub random_order | ||
3443 : | { | ||
3444 : | my ( $i, $j ); | ||
3445 : | for ( $i = @_ - 1; $i > 0; $i-- ) | ||
3446 : | { | ||
3447 : | $j = int( ($i+1) * rand() ); | ||
3448 : | ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j | ||
3449 : | } | ||
3450 : | |||
3451 : | @_; | ||
3452 : | } | ||
3453 : | |||
3454 : | |||
3455 : | #----------------------------------------------------------------------------- | ||
3456 : | # Intersection of two or more sets: | ||
3457 : | # | ||
3458 : | # @intersection = intersection( \@set1, \@set2, ... ) | ||
3459 : | #----------------------------------------------------------------------------- | ||
3460 : | sub intersection | ||
3461 : | { | ||
3462 : | my $set = shift; | ||
3463 : | my @intersection = @$set; | ||
3464 : | |||
3465 : | foreach $set ( @_ ) | ||
3466 : | { | ||
3467 : | my %set = map { $_ => 1 } @$set; | ||
3468 : | @intersection = grep { exists $set{ $_ } } @intersection; | ||
3469 : | } | ||
3470 : | |||
3471 : | @intersection; | ||
3472 : | } | ||
3473 : | |||
3474 : | |||
3475 : | #----------------------------------------------------------------------------- | ||
3476 : | # Elements in set 1, but not set 2: | ||
3477 : | # | ||
3478 : | # @difference = set_difference( \@set1, \@set2 ) | ||
3479 : | #----------------------------------------------------------------------------- | ||
3480 : | sub set_difference | ||
3481 : | { | ||
3482 : | my ($set1, $set2) = @_; | ||
3483 : | my %set2 = map { $_ => 1 } @$set2; | ||
3484 : | grep { ! ( exists $set2{$_} ) } @$set1; | ||
3485 : | } | ||
3486 : | |||
3487 : | |||
3488 : | golsen | 1.1 | 1; |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |