Parent Directory
|
Revision Log
Revision 1.12 - (view) (download) (as text)
1 : | olson | 1.2 | # |
2 : | golsen | 1.8 | # Copyright (c) 2003-2007 University of Chicago and Fellowship |
3 : | olson | 1.2 | # for Interpretations of Genomes. All Rights Reserved. |
4 : | # | ||
5 : | # This file is part of the SEED Toolkit. | ||
6 : | golsen | 1.9 | # |
7 : | olson | 1.2 | # The SEED Toolkit is free software. You can redistribute |
8 : | # it and/or modify it under the terms of the SEED Toolkit | ||
9 : | golsen | 1.9 | # Public License. |
10 : | olson | 1.2 | # |
11 : | # You should have received a copy of the SEED Toolkit Public License | ||
12 : | # along with this program; if not write to the University of Chicago | ||
13 : | # at info@ci.uchicago.edu or the Fellowship for Interpretation of | ||
14 : | # Genomes at veronika@thefig.info or download a copy from | ||
15 : | # http://www.theseed.org/LICENSE.TXT. | ||
16 : | # | ||
17 : | |||
18 : | golsen | 1.1 | package gjonewicklib; |
19 : | |||
20 : | #=============================================================================== | ||
21 : | # perl functions for dealing with trees | ||
22 : | # | ||
23 : | # Usage: | ||
24 : | # use gjonewicklib | ||
25 : | # | ||
26 : | # | ||
27 : | #=============================================================================== | ||
28 : | # Tree data structures: | ||
29 : | #=============================================================================== | ||
30 : | # | ||
31 : | # Elements in newick text file are: | ||
32 : | # | ||
33 : | # [c1] ( desc_1, desc_2, ... ) [c2] label [c3] : [c4] x [c5] | ||
34 : | # | ||
35 : | # Note that: | ||
36 : | golsen | 1.9 | # |
37 : | golsen | 1.1 | # Comment list 1 can exist on any subtree, but its association with |
38 : | # tree components can be upset by rerooting | ||
39 : | # Comment list 2 cannot exist without a descendant list | ||
40 : | # Comment list 3 cannot exist without a label | ||
41 : | # Comment lists 4 and 5 cannot exist without a branch length | ||
42 : | # | ||
43 : | # Elements in perl representation are: | ||
44 : | # | ||
45 : | # $tree = \@rootnode; | ||
46 : | # | ||
47 : | overbeek | 1.7 | # $node = [ \@desc, # reference to list of descendants |
48 : | golsen | 1.1 | # $label, # node label |
49 : | # $x, # branch length | ||
50 : | # \@c1, # reference to comment list 1 | ||
51 : | # \@c2, # reference to comment list 2 | ||
52 : | # \@c3, # reference to comment list 3 | ||
53 : | # \@c4, # reference to comment list 4 | ||
54 : | # \@c5 # reference to comment list 5 | ||
55 : | overbeek | 1.7 | # ] |
56 : | golsen | 1.1 | # |
57 : | # At present, no routine tests or enforces the length of the list (a single | ||
58 : | # element list could be a valid internal node). | ||
59 : | # | ||
60 : | # All empty lists can be [] or undef | ||
61 : | # | ||
62 : | # Putting the comments at the end allows a shorter list nearly all the | ||
63 : | # time, but is different from the prolog representation. | ||
64 : | # | ||
65 : | # | ||
66 : | overbeek | 1.7 | # Ross Overbeek has a different tree node structure: |
67 : | # | ||
68 : | # $node = [ Label, | ||
69 : | # DistanceToParent, | ||
70 : | # [ ParentPointer, ChildPointer1, ... ], | ||
71 : | # [ Name1\tVal1, Name2\tVal2, ... ] | ||
72 : | # ] | ||
73 : | # | ||
74 : | # So: | ||
75 : | # | ||
76 : | #=============================================================================== | ||
77 : | # Tree format interconversion: | ||
78 : | #=============================================================================== | ||
79 : | # | ||
80 : | golsen | 1.9 | # $bool = is_overbeek_tree( $tree ) |
81 : | # $bool = is_gjonewick_tree( $tree ) | ||
82 : | # | ||
83 : | overbeek | 1.7 | # $gjonewick = overbeek_to_gjonewick( $overbeek ) |
84 : | # $overbeek = gjonewick_to_overbeek( $gjonewick ) | ||
85 : | # | ||
86 : | golsen | 1.1 | #=============================================================================== |
87 : | # Tree data extraction: | ||
88 : | #=============================================================================== | ||
89 : | # | ||
90 : | # $listref = newick_desc_ref( $noderef ) | ||
91 : | # $label = newick_lbl( $noderef ) | ||
92 : | # $x = newick_x( $noderef ) | ||
93 : | # $listref = newick_c1( $noderef ) | ||
94 : | # $listref = newick_c2( $noderef ) | ||
95 : | # $listref = newick_c3( $noderef ) | ||
96 : | # $listref = newick_c4( $noderef ) | ||
97 : | # $listref = newick_c5( $noderef ) | ||
98 : | # | ||
99 : | # @desclist = newick_desc_list( $noderef ) | ||
100 : | # $n = newick_n_desc( $noderef ) | ||
101 : | # $descref = newick_desc_i( $noderef, $i ) # 1-based numbering | ||
102 : | # $bool = newick_is_tip( $noderef ) | ||
103 : | # | ||
104 : | # set_newick_desc_ref( $noderef, $listref ) | ||
105 : | # set_newick_lbl( $noderef, $label ) | ||
106 : | # set_newick_x( $noderef, $x ) | ||
107 : | # set_newick_c1( $noderef, $listref ) | ||
108 : | # set_newick_c2( $noderef, $listref ) | ||
109 : | # set_newick_c3( $noderef, $listref ) | ||
110 : | # set_newick_c4( $noderef, $listref ) | ||
111 : | # set_newick_c5( $noderef, $listref ) | ||
112 : | # set_newick_desc_list( $noderef, @desclist ) | ||
113 : | golsen | 1.9 | # set_newick_desc_i( $noderef1, $i, $noderef2 ) # 1-based numbering |
114 : | golsen | 1.8 | # |
115 : | # $bool = newick_is_valid( $noderef ) # verify that tree is valid | ||
116 : | golsen | 1.1 | # |
117 : | # $bool = newick_is_rooted( $noderef ) # 2 branches from root | ||
118 : | # $bool = newick_is_unrooted( $noderef ) # 3 or more branches from root | ||
119 : | # $bool = newick_is_tip_rooted( $noderef ) # 1 branch from root | ||
120 : | # $bool = newick_is_bifurcating( $noderef ) | ||
121 : | # | ||
122 : | # $n = newick_tip_count( $noderef ) | ||
123 : | # @tiprefs = newick_tip_ref_list( $noderef ) | ||
124 : | golsen | 1.9 | # \@tiprefs = newick_tip_ref_list( $noderef ) |
125 : | golsen | 1.1 | # @tips = newick_tip_list( $noderef ) |
126 : | golsen | 1.9 | # \@tips = newick_tip_list( $noderef ) |
127 : | # | ||
128 : | golsen | 1.1 | # $tipref = newick_first_tip_ref( $noderef ) |
129 : | # $tip = newick_first_tip( $noderef ) | ||
130 : | golsen | 1.9 | # |
131 : | golsen | 1.1 | # @tips = newick_duplicated_tips( $noderef ) |
132 : | golsen | 1.9 | # \@tips = newick_duplicated_tips( $noderef ) |
133 : | # | ||
134 : | golsen | 1.1 | # $bool = newick_tip_in_tree( $noderef, $tipname ) |
135 : | golsen | 1.9 | # |
136 : | golsen | 1.1 | # @tips = newick_shared_tips( $tree1, $tree2 ) |
137 : | golsen | 1.9 | # \@tips = newick_shared_tips( $tree1, $tree2 ) |
138 : | golsen | 1.1 | # |
139 : | # $length = newick_tree_length( $noderef ) | ||
140 : | golsen | 1.9 | # |
141 : | # %tip_distances = newick_tip_distances( $noderef ) | ||
142 : | # \%tip_distances = newick_tip_distances( $noderef ) | ||
143 : | # | ||
144 : | golsen | 1.1 | # $xmax = newick_max_X( $noderef ) |
145 : | # ( $tipref, $xmax ) = newick_most_distant_tip_ref( $noderef ) | ||
146 : | golsen | 1.9 | # ( $tipname, $xmax ) = newick_most_distant_tip_name( $noderef ) |
147 : | golsen | 1.1 | # |
148 : | golsen | 1.8 | # Tree tip insertion point (tip is on branch of length x that |
149 : | # is inserted into branch connecting node1 and node2, a distance | ||
150 : | # x1 from node1 and x2 from node2): | ||
151 : | # | ||
152 : | golsen | 1.9 | # [ $node1, $x1, $node2, $x2, $x ] = newick_tip_insertion_point( $tree, $tip ) |
153 : | golsen | 1.8 | # |
154 : | # Standardized label for a node in terms of intersection of 3 lowest sorting | ||
155 : | # tips (sort is lower case): | ||
156 : | # | ||
157 : | golsen | 1.9 | # @TipOrTips = std_node_name( $tree, $node ) |
158 : | golsen | 1.8 | # |
159 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
160 : | # Paths from root of tree: | ||
161 : | #------------------------------------------------------------------------------- | ||
162 : | # | ||
163 : | # Path descriptions are of form: | ||
164 : | # ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN ) | ||
165 : | # () is returned upon failure | ||
166 : | # | ||
167 : | # @path = path_to_tip( $treenode, $tipname ) | ||
168 : | # @path = path_to_named_node( $treenode, $nodename ) | ||
169 : | # @path = path_to_node_ref( $treenode, $noderef ) | ||
170 : | # | ||
171 : | # @path = path_to_node( $node, $tip1, $tip2, $tip3 ) # 3 tip names | ||
172 : | # @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] ) # Array of tips | ||
173 : | # @path = path_to_node( $node, $tip1 ) # Use path_to_tip | ||
174 : | # @path = path_to_node( $node, [ $tip1 ] ) # Use path_to_tip | ||
175 : | # | ||
176 : | # $distance = newick_path_length( @path ) | ||
177 : | # $distance = tip_to_tip_distance( $tree, $tip1, $tip2 ) | ||
178 : | # $distance = node_to_node_distance( $tree, $node1, $node2 ) | ||
179 : | # | ||
180 : | # | ||
181 : | #=============================================================================== | ||
182 : | # Tree manipulations: | ||
183 : | #=============================================================================== | ||
184 : | # | ||
185 : | overbeek | 1.4 | # $treecopy = copy_newick_tree( $tree ) |
186 : | golsen | 1.1 | # |
187 : | #------------------------------------------------------------------------------- | ||
188 : | golsen | 1.8 | # The following modify the existing tree, and possibly any components of that |
189 : | golsen | 1.1 | # tree that are reached by reference. If the old version is still needed, copy |
190 : | # before modifying. | ||
191 : | #------------------------------------------------------------------------------- | ||
192 : | # | ||
193 : | # Modify labels: | ||
194 : | # | ||
195 : | # $newtree = newick_relabel_nodes( $node, \%new_name ) | ||
196 : | # $newtree = newick_relabel_nodes_i( $node, \%new_name ) | ||
197 : | # $newtree = newick_relabel_tips( $node, \%new_name ) | ||
198 : | # $newtree = newick_relabel_tips_i( $node, \%new_name ) | ||
199 : | # | ||
200 : | # Modify branches: | ||
201 : | # | ||
202 : | # $n_changed = newick_set_undefined_branches( $node, $x ) | ||
203 : | # $n_changed = newick_set_all_branches( $node, $x ) | ||
204 : | golsen | 1.5 | # $n_changed = newick_fix_negative_branches( $tree ) |
205 : | overbeek | 1.7 | # $node = newick_rescale_branches( $node, $factor ) |
206 : | golsen | 1.1 | # |
207 : | golsen | 1.8 | # Modify comments: |
208 : | # | ||
209 : | # $node = newick_strip_comments( $node ) | ||
210 : | # | ||
211 : | golsen | 1.1 | # Modify rooting and/or order: |
212 : | # | ||
213 : | # $nrmtree = normalize_newick_tree( $tree ) | ||
214 : | # $revtree = reverse_newick_tree( $tree ) | ||
215 : | # $stdtree = std_unrooted_newick( $tree ) | ||
216 : | # $newtree = aesthetic_newick_tree( $tree, $direction ) | ||
217 : | # $rndtree = random_order_newick_tree( $tree ) | ||
218 : | # $newtree = reroot_newick_by_path( @path ) | ||
219 : | # $newtree = reroot_newick_to_tip( $tree, $tip ) | ||
220 : | # $newtree = reroot_newick_next_to_tip( $tree, $tip ) | ||
221 : | # $newtree = reroot_newick_to_node( $tree, @node ) | ||
222 : | # $newtree = reroot_newick_to_node_ref( $tree, $noderef ) | ||
223 : | golsen | 1.9 | # $newtree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction ) |
224 : | golsen | 1.12 | # $newtree = reroot_newick_to_midpoint( $tree ) # unweighted |
225 : | # $newtree = reroot_newick_to_midpoint_w( $tree ) # weight by tips | ||
226 : | golsen | 1.5 | # $newtree = reroot_newick_to_approx_midpoint( $tree ) # unweighted |
227 : | # $newtree = reroot_newick_to_approx_midpoint_w( $tree ) # weight by tips | ||
228 : | golsen | 1.1 | # $newtree = uproot_tip_rooted_newick( $tree ) |
229 : | # $newtree = uproot_newick( $tree ) | ||
230 : | # | ||
231 : | # $newtree = prune_from_newick( $tree, $tip ) | ||
232 : | golsen | 1.12 | # $newtree = rooted_newick_subtree( $tree, @tips ) |
233 : | # $newtree = rooted_newick_subtree( $tree, \@tips ) | ||
234 : | golsen | 1.1 | # $newtree = newick_subtree( $tree, @tips ) |
235 : | # $newtree = newick_subtree( $tree, \@tips ) | ||
236 : | golsen | 1.12 | # $newtree = newick_covering_subtree( $tree, @tips ) |
237 : | # $newtree = newick_covering_subtree( $tree, \@tips ) | ||
238 : | golsen | 1.1 | # |
239 : | golsen | 1.5 | # $newtree = collapse_zero_length_branches( $tree ) |
240 : | # | ||
241 : | golsen | 1.8 | # $node = newick_insert_at_node( $node, $subtree ) |
242 : | # $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction ) | ||
243 : | # | ||
244 : | golsen | 1.1 | #=============================================================================== |
245 : | golsen | 1.9 | # Tree neighborhood: subtree of n tips to represent a larger tree. |
246 : | #=============================================================================== | ||
247 : | # | ||
248 : | # Focus around root: | ||
249 : | # | ||
250 : | # $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority ) | ||
251 : | # $subtree = root_neighborhood_representative_tree( $tree, $n ) | ||
252 : | # @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
253 : | # @tips = root_neighborhood_representative_tips( $tree, $n ) | ||
254 : | # \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
255 : | # \@tips = root_neighborhood_representative_tips( $tree, $n ) | ||
256 : | # | ||
257 : | # Focus around a tip insertion point (the tip is not in the subtree): | ||
258 : | # | ||
259 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority ) | ||
260 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n ) | ||
261 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
262 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
263 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
264 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
265 : | # | ||
266 : | #=============================================================================== | ||
267 : | golsen | 1.1 | # Tree reading and writing: |
268 : | #=============================================================================== | ||
269 : | golsen | 1.9 | # Write machine-readable trees: |
270 : | golsen | 1.1 | # |
271 : | # writeNewickTree( $tree ) | ||
272 : | # writeNewickTree( $tree, $file ) | ||
273 : | overbeek | 1.7 | # writeNewickTree( $tree, \*FH ) |
274 : | # fwriteNewickTree( $file, $tree ) # Matches the C arg list for f... I/O | ||
275 : | golsen | 1.1 | # $treestring = swriteNewickTree( $tree ) |
276 : | # $treestring = formatNewickTree( $tree ) | ||
277 : | golsen | 1.9 | # |
278 : | # Write human-readable trees: | ||
279 : | # | ||
280 : | golsen | 1.1 | # @textlines = text_plot_newick( $node, $width, $min_dx, $dy ) |
281 : | # printer_plot_newick( $node, $file, $width, $min_dx, $dy ) | ||
282 : | # | ||
283 : | golsen | 1.9 | # Read trees: |
284 : | # | ||
285 : | overbeek | 1.7 | # $tree = read_newick_tree( $file ) # reads to a semicolon |
286 : | # @trees = read_newick_trees( $file ) # reads to end of file | ||
287 : | # $tree = parse_newick_tree_str( $string ) | ||
288 : | golsen | 1.1 | # |
289 : | #=============================================================================== | ||
290 : | |||
291 : | |||
292 : | golsen | 1.8 | use Carp; |
293 : | use Data::Dumper; | ||
294 : | golsen | 1.9 | use strict; |
295 : | golsen | 1.8 | |
296 : | golsen | 1.1 | require Exporter; |
297 : | |||
298 : | our @ISA = qw(Exporter); | ||
299 : | our @EXPORT = qw( | ||
300 : | golsen | 1.9 | is_overbeek_tree |
301 : | is_gjonewick_tree | ||
302 : | overbeek | 1.7 | overbeek_to_gjonewick |
303 : | gjonewick_to_overbeek | ||
304 : | golsen | 1.8 | newick_is_valid |
305 : | golsen | 1.1 | newick_is_rooted |
306 : | newick_is_unrooted | ||
307 : | tree_rooted_on_tip | ||
308 : | newick_is_bifurcating | ||
309 : | newick_tip_count | ||
310 : | golsen | 1.9 | newick_tip_ref_list |
311 : | golsen | 1.1 | newick_tip_list |
312 : | golsen | 1.9 | |
313 : | golsen | 1.1 | newick_first_tip |
314 : | newick_duplicated_tips | ||
315 : | newick_tip_in_tree | ||
316 : | newick_shared_tips | ||
317 : | |||
318 : | newick_tree_length | ||
319 : | golsen | 1.9 | newick_tip_distances |
320 : | golsen | 1.1 | newick_max_X |
321 : | newick_most_distant_tip_ref | ||
322 : | newick_most_distant_tip_name | ||
323 : | golsen | 1.9 | |
324 : | newick_tip_insertion_point | ||
325 : | |||
326 : | golsen | 1.1 | std_newick_name |
327 : | |||
328 : | path_to_tip | ||
329 : | path_to_named_node | ||
330 : | path_to_node_ref | ||
331 : | path_to_node | ||
332 : | |||
333 : | newick_path_length | ||
334 : | tip_to_tip_distance | ||
335 : | node_to_node_distance | ||
336 : | |||
337 : | overbeek | 1.4 | copy_newick_tree |
338 : | golsen | 1.1 | |
339 : | newick_relabel_nodes | ||
340 : | newick_relabel_nodes_i | ||
341 : | newick_relabel_tips | ||
342 : | newick_relabel_tips_i | ||
343 : | |||
344 : | newick_set_undefined_branches | ||
345 : | newick_set_all_branches | ||
346 : | golsen | 1.5 | newick_fix_negative_branches |
347 : | overbeek | 1.7 | newick_rescale_branches |
348 : | golsen | 1.1 | |
349 : | golsen | 1.8 | newick_strip_comments |
350 : | |||
351 : | golsen | 1.1 | normalize_newick_tree |
352 : | reverse_newick_tree | ||
353 : | std_unrooted_newick | ||
354 : | aesthetic_newick_tree | ||
355 : | unaesthetic_newick_tree | ||
356 : | random_order_newick_tree | ||
357 : | |||
358 : | reroot_newick_by_path | ||
359 : | reroot_newick_to_tip | ||
360 : | reroot_newick_next_to_tip | ||
361 : | reroot_newick_to_node | ||
362 : | reroot_newick_to_node_ref | ||
363 : | golsen | 1.9 | reroot_newick_between_nodes |
364 : | golsen | 1.12 | reroot_newick_to_midpoint |
365 : | reroot_newick_to_midpoint_w | ||
366 : | golsen | 1.1 | reroot_newick_to_approx_midpoint |
367 : | golsen | 1.5 | reroot_newick_to_approx_midpoint_w |
368 : | golsen | 1.1 | uproot_tip_rooted_newick |
369 : | uproot_newick | ||
370 : | |||
371 : | prune_from_newick | ||
372 : | golsen | 1.12 | rooted_newick_subtree |
373 : | golsen | 1.1 | newick_subtree |
374 : | golsen | 1.12 | newick_covering_subtree |
375 : | golsen | 1.5 | collapse_zero_length_branches |
376 : | golsen | 1.1 | |
377 : | golsen | 1.8 | newick_insert_at_node |
378 : | newick_insert_between_nodes | ||
379 : | |||
380 : | golsen | 1.9 | root_neighborhood_representative_tree |
381 : | root_neighborhood_representative_tips | ||
382 : | tip_neighborhood_representative_tree | ||
383 : | tip_neighborhood_representative_tips | ||
384 : | |||
385 : | golsen | 1.1 | writeNewickTree |
386 : | fwriteNewickTree | ||
387 : | strNewickTree | ||
388 : | formatNewickTree | ||
389 : | overbeek | 1.7 | |
390 : | read_newick_tree | ||
391 : | read_newick_trees | ||
392 : | golsen | 1.1 | parse_newick_tree_str |
393 : | |||
394 : | printer_plot_newick | ||
395 : | text_plot_newick | ||
396 : | ); | ||
397 : | |||
398 : | our @EXPORT_OK = qw( | ||
399 : | newick_desc_ref | ||
400 : | newick_lbl | ||
401 : | newick_x | ||
402 : | newick_c1 | ||
403 : | newick_c2 | ||
404 : | newick_c3 | ||
405 : | newick_c4 | ||
406 : | newick_c5 | ||
407 : | newick_desc_list | ||
408 : | newick_n_desc | ||
409 : | newick_desc_i | ||
410 : | newick_is_tip | ||
411 : | newick_is_valid | ||
412 : | |||
413 : | set_newick_desc_ref | ||
414 : | set_newick_lbl | ||
415 : | set_newick_x | ||
416 : | set_newick_c1 | ||
417 : | set_newick_c2 | ||
418 : | set_newick_c3 | ||
419 : | set_newick_c4 | ||
420 : | set_newick_c5 | ||
421 : | |||
422 : | set_newick_desc_list | ||
423 : | set_newick_desc_i | ||
424 : | |||
425 : | add_to_newick_branch | ||
426 : | dump_tree | ||
427 : | ); | ||
428 : | |||
429 : | |||
430 : | #------------------------------------------------------------------------------- | ||
431 : | # Internally used definitions | ||
432 : | #------------------------------------------------------------------------------- | ||
433 : | |||
434 : | sub array_ref { ref( $_[0] ) eq "ARRAY" } | ||
435 : | sub hash_ref { ref( $_[0] ) eq "HASH" } | ||
436 : | |||
437 : | |||
438 : | #=============================================================================== | ||
439 : | golsen | 1.9 | # Interconvert overbeek and gjonewick trees: |
440 : | overbeek | 1.7 | #=============================================================================== |
441 : | |||
442 : | golsen | 1.9 | sub is_overbeek_tree { array_ref( $_[0] ) && array_ref( $_[0]->[2] ) } |
443 : | |||
444 : | sub is_gjonewick_tree { array_ref( $_[0] ) && array_ref( $_[0]->[0] ) } | ||
445 : | |||
446 : | overbeek | 1.7 | sub overbeek_to_gjonewick |
447 : | { | ||
448 : | return () unless ref( $_[0] ) eq 'ARRAY'; | ||
449 : | my ( $lbl, $x, $desc ) = @{ $_[0] }; | ||
450 : | my ( undef, @desc ) = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : (); | ||
451 : | [ [ map { overbeek_to_gjonewick( $_ ) } @desc ], $lbl, $x ] | ||
452 : | } | ||
453 : | |||
454 : | sub gjonewick_to_overbeek | ||
455 : | { | ||
456 : | return () unless ref( $_[0] ) eq 'ARRAY'; | ||
457 : | my ( $desc, $lbl, $x ) = @{ $_[0] }; | ||
458 : | my @desc = ( $desc && ref( $desc ) eq 'ARRAY' ) ? @$desc : (); | ||
459 : | my $parent = $_[1]; | ||
460 : | my $node = [ $lbl, $x, undef, [] ]; | ||
461 : | $node->[2] = [ $parent, map { gjonewick_to_overbeek( $_, $node ) } @desc ]; | ||
462 : | return $node; | ||
463 : | } | ||
464 : | |||
465 : | golsen | 1.9 | |
466 : | overbeek | 1.7 | #=============================================================================== |
467 : | golsen | 1.1 | # Extract tree structure values: |
468 : | #=============================================================================== | ||
469 : | # | ||
470 : | # $listref = newick_desc_ref( $noderef ) | ||
471 : | # $string = newick_lbl( $noderef ) | ||
472 : | # $real = newick_x( $noderef ) | ||
473 : | # $listref = newick_c1( $noderef ) | ||
474 : | # $listref = newick_c2( $noderef ) | ||
475 : | # $listref = newick_c3( $noderef ) | ||
476 : | # $listref = newick_c4( $noderef ) | ||
477 : | # $listref = newick_c5( $noderef ) | ||
478 : | # @list = newick_desc_list( $noderef ) | ||
479 : | # $int = newick_n_desc( $noderef ) | ||
480 : | # $listref = newick_desc_i( $noderef ) | ||
481 : | # $bool = node_is_tip( $noderef ) | ||
482 : | # $bool = node_is_valid( $noderef ) | ||
483 : | # | ||
484 : | #------------------------------------------------------------------------------- | ||
485 : | |||
486 : | golsen | 1.12 | sub newick_desc_ref { ref($_[0]) ? $_[0]->[0] : Carp::confess() } # = ${$_[0]}[0] |
487 : | golsen | 1.8 | sub newick_lbl { ref($_[0]) ? $_[0]->[1] : Carp::confess() } |
488 : | golsen | 1.12 | sub newick_x { ref($_[0]) ? $_[0]->[2] : Carp::confess() } |
489 : | golsen | 1.10 | sub newick_c1 { ref($_[0]) ? $_[0]->[3] : Carp::confess() } |
490 : | golsen | 1.12 | sub newick_c2 { ref($_[0]) ? $_[0]->[4] : Carp::confess() } |
491 : | sub newick_c3 { ref($_[0]) ? $_[0]->[5] : Carp::confess() } | ||
492 : | sub newick_c4 { ref($_[0]) ? $_[0]->[6] : Carp::confess() } | ||
493 : | sub newick_c5 { ref($_[0]) ? $_[0]->[7] : Carp::confess() } | ||
494 : | golsen | 1.1 | |
495 : | sub newick_desc_list { | ||
496 : | my $node = $_[0]; | ||
497 : | ! array_ref( $node ) ? undef : | ||
498 : | array_ref( $node->[0] ) ? @{ $node->[0] } : | ||
499 : | () ; | ||
500 : | } | ||
501 : | |||
502 : | sub newick_n_desc { | ||
503 : | my $node = $_[0]; | ||
504 : | ! array_ref( $node ) ? undef : | ||
505 : | array_ref( $node->[0] ) ? scalar @{ $node->[0] } : | ||
506 : | 0 ; | ||
507 : | } | ||
508 : | |||
509 : | sub newick_desc_i { | ||
510 : | my ( $node, $i ) = @_; | ||
511 : | ! array_ref( $node ) ? undef : | ||
512 : | array_ref( $node->[0] ) ? $node->[0]->[$i-1] : | ||
513 : | undef ; | ||
514 : | } | ||
515 : | |||
516 : | sub node_is_tip { | ||
517 : | my $node = $_[0]; | ||
518 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
519 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 0 : # Empty descend list? | ||
520 : | 1 ; # No descend list | ||
521 : | } | ||
522 : | |||
523 : | sub node_is_valid { # An array ref with nonempty descend list or a label | ||
524 : | my $node = $_[0]; | ||
525 : | array_ref( $node ) && ( array_ref( $node->[0] ) && @{ $node->[0] } | ||
526 : | || defined( $node->[1] ) | ||
527 : | ) | ||
528 : | } | ||
529 : | |||
530 : | |||
531 : | #------------------------------------------------------------------------------- | ||
532 : | # Set tree structure values | ||
533 : | #------------------------------------------------------------------------------- | ||
534 : | |||
535 : | sub set_newick_desc_ref { $_[0]->[0] = $_[1] } | ||
536 : | sub set_newick_lbl { $_[0]->[1] = $_[1] } | ||
537 : | sub set_newick_x { $_[0]->[2] = $_[1] } | ||
538 : | sub set_newick_c1 { $_[0]->[3] = $_[1] } | ||
539 : | sub set_newick_c2 { $_[0]->[4] = $_[1] } | ||
540 : | sub set_newick_c3 { $_[0]->[5] = $_[1] } | ||
541 : | sub set_newick_c4 { $_[0]->[6] = $_[1] } | ||
542 : | sub set_newick_c5 { $_[0]->[7] = $_[1] } | ||
543 : | |||
544 : | sub set_newick_desc_list { | ||
545 : | my $node = shift; | ||
546 : | array_ref( $node ) || return; | ||
547 : | if ( array_ref( $node->[0] ) ) { @{ $node->[0] } = @_ } | ||
548 : | else { $node->[0] = [ @_ ] } | ||
549 : | } | ||
550 : | |||
551 : | sub set_newick_desc_i { | ||
552 : | my ( $node1, $i, $node2 ) = @_; | ||
553 : | array_ref( $node1 ) && array_ref( $node2 ) || return; | ||
554 : | if ( array_ref( $node1->[0] ) ) { $node1->[0]->[$i-1] = $node2 } | ||
555 : | else { $node1->[0] = [ $node2 ] } | ||
556 : | } | ||
557 : | |||
558 : | |||
559 : | #=============================================================================== | ||
560 : | # Some tree property tests: | ||
561 : | #=============================================================================== | ||
562 : | golsen | 1.8 | # Tree is valid? |
563 : | # | ||
564 : | # $bool = newick_is_valid( $node, $verbose ) | ||
565 : | #------------------------------------------------------------------------------- | ||
566 : | sub newick_is_valid | ||
567 : | { | ||
568 : | my $node = shift; | ||
569 : | |||
570 : | if ( ! array_ref( $node ) ) | ||
571 : | { | ||
572 : | print STDERR "Node is not array reference\n" if $_[0]; | ||
573 : | return 0; | ||
574 : | } | ||
575 : | |||
576 : | my @node = @$node; | ||
577 : | if ( ! @node ) | ||
578 : | { | ||
579 : | print STDERR "Node is empty array reference\n" if $_[0]; | ||
580 : | return 0; | ||
581 : | } | ||
582 : | |||
583 : | # Must have descendant or label: | ||
584 : | |||
585 : | if ( ! ( array_ref( $node[0] ) && @{ $node[0] } ) && ! $node[2] ) | ||
586 : | { | ||
587 : | print STDERR "Node has neither descendant nor label\n" if $_[0]; | ||
588 : | return 0; | ||
589 : | } | ||
590 : | |||
591 : | # If comments are present, they must be array references | ||
592 : | |||
593 : | foreach ( ( @node > 3 ) ? @node[ 3 .. $#node ] : () ) | ||
594 : | { | ||
595 : | if ( defined( $_ ) && ! array_ref( $_ ) ) | ||
596 : | { | ||
597 : | print STDERR "Node has neither descendant or label\n" if $_[0]; | ||
598 : | return 0; | ||
599 : | } | ||
600 : | } | ||
601 : | |||
602 : | # Inspect the descendants: | ||
603 : | |||
604 : | foreach ( array_ref( $node[0] ) ? @{ $node[0] } : () ) | ||
605 : | { | ||
606 : | newick_is_valid( $_, @_ ) || return 0 | ||
607 : | } | ||
608 : | |||
609 : | return 1; | ||
610 : | } | ||
611 : | |||
612 : | |||
613 : | #------------------------------------------------------------------------------- | ||
614 : | golsen | 1.1 | # Tree is rooted (2 branches at root node)? |
615 : | # | ||
616 : | # $bool = newick_is_rooted( $node ) | ||
617 : | #------------------------------------------------------------------------------- | ||
618 : | sub newick_is_rooted { | ||
619 : | my $node = $_[0]; | ||
620 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
621 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 2 : # 2 branches | ||
622 : | 0 ; # No descend list | ||
623 : | } | ||
624 : | |||
625 : | |||
626 : | #------------------------------------------------------------------------------- | ||
627 : | # Tree is unrooted (> 2 branches at root node)? | ||
628 : | # | ||
629 : | # $bool = newick_is_unrooted( $node ) | ||
630 : | #------------------------------------------------------------------------------- | ||
631 : | sub newick_is_unrooted { | ||
632 : | my $node = $_[0]; | ||
633 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
634 : | array_ref( $node->[0] ) ? @{ $node->[0] } >= 3 : # Over 2 branches | ||
635 : | 0 ; # No descend list | ||
636 : | } | ||
637 : | |||
638 : | |||
639 : | #------------------------------------------------------------------------------- | ||
640 : | # Tree is rooted on a tip (1 branch at root node)? | ||
641 : | # | ||
642 : | # $bool = newick_is_tip_rooted( $node ) | ||
643 : | #------------------------------------------------------------------------------- | ||
644 : | sub newick_is_tip_rooted { | ||
645 : | my $node = $_[0]; | ||
646 : | ! array_ref( $node ) ? undef : # Not a node ref | ||
647 : | array_ref( $node->[0] ) ? @{ $node->[0] } == 1 : # 1 branch | ||
648 : | 0 ; # No descend list | ||
649 : | } | ||
650 : | |||
651 : | #=============================================================================== | ||
652 : | # Everything below this point refers to parts of the tree structure using | ||
653 : | # only the routines above. | ||
654 : | #=============================================================================== | ||
655 : | # Tree is bifurcating? If so, return number of descendents of root node. | ||
656 : | # | ||
657 : | # $n_desc = newick_is_bifurcating( $node ) | ||
658 : | #------------------------------------------------------------------------------- | ||
659 : | sub newick_is_bifurcating { | ||
660 : | my ( $node, $not_root ) = @_; | ||
661 : | if ( ! array_ref( $node ) ) { return undef } # Bad arg | ||
662 : | |||
663 : | my $n = newick_n_desc( $node ); | ||
664 : | $n == 0 && ! $not_root ? 0 : | ||
665 : | $n == 1 && $not_root ? 0 : | ||
666 : | $n == 3 && $not_root ? 0 : | ||
667 : | $n > 3 ? 0 : | ||
668 : | $n > 2 && ! newick_is_bifurcating( newick_desc_i( $node, 3, 1 ) ) ? 0 : | ||
669 : | $n > 1 && ! newick_is_bifurcating( newick_desc_i( $node, 2, 1 ) ) ? 0 : | ||
670 : | $n > 0 && ! newick_is_bifurcating( newick_desc_i( $node, 1, 1 ) ) ? 0 : | ||
671 : | $n | ||
672 : | } | ||
673 : | |||
674 : | |||
675 : | #------------------------------------------------------------------------------- | ||
676 : | # Number of tips: | ||
677 : | # | ||
678 : | # $n = newick_tip_count( $node ) | ||
679 : | #------------------------------------------------------------------------------- | ||
680 : | sub newick_tip_count { | ||
681 : | my ( $node, $not_root ) = @_; | ||
682 : | |||
683 : | my $imax = newick_n_desc( $node ); | ||
684 : | if ( $imax < 1 ) { return 1 } | ||
685 : | |||
686 : | # Special case for tree rooted on tip | ||
687 : | |||
688 : | my $n = ( $imax == 1 && ( ! $not_root ) ) ? 1 : 0; | ||
689 : | |||
690 : | foreach ( newick_desc_list( $node ) ) { $n += newick_tip_count( $_, 1 ) } | ||
691 : | |||
692 : | $n; | ||
693 : | } | ||
694 : | |||
695 : | |||
696 : | #------------------------------------------------------------------------------- | ||
697 : | # List of tip nodes: | ||
698 : | # | ||
699 : | golsen | 1.9 | # @tips = newick_tip_ref_list( $noderef ) |
700 : | # \@tips = newick_tip_ref_list( $noderef ) | ||
701 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
702 : | sub newick_tip_ref_list { | ||
703 : | my ( $node, $not_root ) = @_; | ||
704 : | |||
705 : | my $imax = newick_n_desc( $node ); | ||
706 : | if ( $imax < 1 ) { return $node } | ||
707 : | |||
708 : | my @list = (); | ||
709 : | |||
710 : | # Tree rooted on tip? | ||
711 : | if ( $imax == 1 && ! $not_root && newick_lbl( $node ) ) { push @list, $node } | ||
712 : | |||
713 : | foreach ( newick_desc_list( $node ) ) { | ||
714 : | push @list, newick_tip_ref_list( $_, 1 ); | ||
715 : | } | ||
716 : | |||
717 : | golsen | 1.9 | wantarray ? @list : \@list; |
718 : | golsen | 1.1 | } |
719 : | |||
720 : | |||
721 : | #------------------------------------------------------------------------------- | ||
722 : | # List of tips: | ||
723 : | # | ||
724 : | # @tips = newick_tip_list( $node ) | ||
725 : | golsen | 1.9 | # \@tips = newick_tip_list( $node ) |
726 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
727 : | sub newick_tip_list { | ||
728 : | golsen | 1.9 | my @tips = map { newick_lbl( $_ ) } newick_tip_ref_list( $_[0] ); |
729 : | wantarray ? @tips : \@tips; | ||
730 : | golsen | 1.1 | } |
731 : | |||
732 : | |||
733 : | #------------------------------------------------------------------------------- | ||
734 : | # First tip node in tree: | ||
735 : | # | ||
736 : | # $tipref = newick_first_tip_ref( $node ) | ||
737 : | #------------------------------------------------------------------------------- | ||
738 : | sub newick_first_tip_ref { | ||
739 : | my ( $node, $not_root ) = @_; | ||
740 : | valid_node( $node ) || return undef; | ||
741 : | |||
742 : | # Arrived at tip, or start of a tip-rooted tree? | ||
743 : | my $n = newick_n_desc( $node ); | ||
744 : | if ( ( $n < 1 ) || ( $n == 1 && ! $not_root ) ) { return $node } | ||
745 : | |||
746 : | newick_first_tip_ref( newick_desc_i( $node, 1 ), 1 ); | ||
747 : | } | ||
748 : | |||
749 : | |||
750 : | #------------------------------------------------------------------------------- | ||
751 : | # First tip name in tree: | ||
752 : | # | ||
753 : | # $tip = newick_first_tip( $node ) | ||
754 : | #------------------------------------------------------------------------------- | ||
755 : | sub newick_first_tip { | ||
756 : | my ( $noderef ) = @_; | ||
757 : | |||
758 : | my $tipref; | ||
759 : | array_ref( $tipref = newick_first_tip_ref( $noderef ) ) ? newick_lbl( $tipref ) | ||
760 : | golsen | 1.9 | : undef; |
761 : | golsen | 1.1 | } |
762 : | |||
763 : | |||
764 : | #------------------------------------------------------------------------------- | ||
765 : | # List of duplicated tip labels. | ||
766 : | # | ||
767 : | # @tips = newick_duplicated_tips( $node ) | ||
768 : | golsen | 1.9 | # \@tips = newick_duplicated_tips( $node ) |
769 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
770 : | sub newick_duplicated_tips { | ||
771 : | golsen | 1.9 | my @tips = &duplicates( newick_tip_list( $_[0] ) ); |
772 : | wantarray ? @tips : \@tips; | ||
773 : | golsen | 1.1 | } |
774 : | |||
775 : | |||
776 : | #------------------------------------------------------------------------------- | ||
777 : | # Tip in tree? | ||
778 : | # | ||
779 : | # $bool = newick_tip_in_tree( $node, $tipname ) | ||
780 : | #------------------------------------------------------------------------------- | ||
781 : | sub newick_tip_in_tree { | ||
782 : | my ( $node, $tip, $not_root ) = @_; | ||
783 : | |||
784 : | my $n = newick_n_desc( $node ); | ||
785 : | if ( $n < 1 ) { return ( newick_lbl( $node ) eq $tip) ? 1 : 0 } | ||
786 : | |||
787 : | # Special case for tree rooted on tip | ||
788 : | |||
789 : | if ( $n == 1 && ( ! $not_root ) && newick_lbl( $node ) eq $tip ) { return 1 } | ||
790 : | |||
791 : | foreach ( newick_desc_list( $node ) ) { | ||
792 : | if ( newick_tip_in_tree( $_, $tip, 1 ) ) { return 1 } | ||
793 : | } | ||
794 : | |||
795 : | 0; # Fall through means not found | ||
796 : | } | ||
797 : | |||
798 : | |||
799 : | #------------------------------------------------------------------------------- | ||
800 : | # Tips shared between 2 trees. | ||
801 : | # | ||
802 : | # @tips = newick_shared_tips( $tree1, $tree2 ) | ||
803 : | golsen | 1.9 | # \@tips = newick_shared_tips( $tree1, $tree2 ) |
804 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
805 : | sub newick_shared_tips { | ||
806 : | golsen | 1.9 | my ( $tree1, $tree2 ) = @_; |
807 : | my $tips1 = newick_tip_list( $tree1 ); | ||
808 : | my $tips2 = newick_tip_list( $tree2 ); | ||
809 : | my @tips = &intersection( $tips1, $tips2 ); | ||
810 : | wantarray ? @tips : \@tips; | ||
811 : | golsen | 1.1 | } |
812 : | |||
813 : | |||
814 : | #------------------------------------------------------------------------------- | ||
815 : | # Tree length. | ||
816 : | # | ||
817 : | # $length = newick_tree_length( $node ) | ||
818 : | #------------------------------------------------------------------------------- | ||
819 : | sub newick_tree_length { | ||
820 : | my ( $node, $not_root ) = @_; | ||
821 : | |||
822 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
823 : | defined( $x ) || ( $x = 1 ); # Convert undefined to 1 | ||
824 : | |||
825 : | foreach ( newick_desc_list( $node ) ) { $x += newick_tree_length( $_, 1 ) } | ||
826 : | |||
827 : | $x; | ||
828 : | } | ||
829 : | |||
830 : | |||
831 : | #------------------------------------------------------------------------------- | ||
832 : | golsen | 1.9 | # Hash of tip nodes and corresponding distances from root: |
833 : | # | ||
834 : | # %tip_distances = newick_tip_distances( $node ) | ||
835 : | # \%tip_distances = newick_tip_distances( $node ) | ||
836 : | #------------------------------------------------------------------------------- | ||
837 : | sub newick_tip_distances | ||
838 : | { | ||
839 : | my ( $node, $x, $hash ) = @_; | ||
840 : | my $root = ! $hash; | ||
841 : | ref( $hash ) eq 'HASH' or $hash = {}; | ||
842 : | |||
843 : | $x ||= 0; | ||
844 : | $x += newick_x( $node ) || 0; | ||
845 : | |||
846 : | # Is it a tip? | ||
847 : | |||
848 : | my $n_desc = newick_n_desc( $node ); | ||
849 : | if ( ! $n_desc ) | ||
850 : | { | ||
851 : | $hash->{ newick_lbl( $node ) } = $x; | ||
852 : | return $hash; | ||
853 : | } | ||
854 : | |||
855 : | # Tree rooted on tip? | ||
856 : | |||
857 : | if ( ( $n_desc == 1 ) && $root && ( newick_lbl( $node ) ) ) | ||
858 : | { | ||
859 : | $hash->{ newick_lbl( $node ) } = 0; # Distance to root is zero | ||
860 : | } | ||
861 : | |||
862 : | foreach ( newick_desc_list( $node ) ) | ||
863 : | { | ||
864 : | newick_tip_distances( $_, $x, $hash ); | ||
865 : | } | ||
866 : | |||
867 : | wantarray ? %$hash : $hash; | ||
868 : | } | ||
869 : | |||
870 : | |||
871 : | #------------------------------------------------------------------------------- | ||
872 : | golsen | 1.1 | # Tree max X. |
873 : | # | ||
874 : | # $xmax = newick_max_X( $node ) | ||
875 : | #------------------------------------------------------------------------------- | ||
876 : | sub newick_max_X { | ||
877 : | my ( $node, $not_root ) = @_; | ||
878 : | |||
879 : | my $xmax = 0; | ||
880 : | foreach ( newick_desc_list( $node ) ) { | ||
881 : | my $x = newick_max_X( $_, 1 ); | ||
882 : | if ( $x > $xmax ) { $xmax = $x } | ||
883 : | } | ||
884 : | |||
885 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
886 : | $xmax + ( defined( $x ) ? $x : 1 ); # Convert undefined to 1 | ||
887 : | } | ||
888 : | |||
889 : | |||
890 : | #------------------------------------------------------------------------------- | ||
891 : | # Most distant tip from root: distance and path. | ||
892 : | # | ||
893 : | # ( $xmax, @path ) = newick_most_distant_tip_path( $tree ) | ||
894 : | #------------------------------------------------------------------------------- | ||
895 : | sub newick_most_distant_tip_path { | ||
896 : | my ( $node, $not_root ) = @_; | ||
897 : | |||
898 : | my $imax = newick_n_desc( $node ); | ||
899 : | my $xmax = ( $imax > 0 ) ? -1 : 0; | ||
900 : | my @pmax = (); | ||
901 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
902 : | my ( $x, @path ) = newick_most_distant_tip_path( newick_desc_i( $node, $i ), 1 ); | ||
903 : | if ( $x > $xmax ) { $xmax = $x; @pmax = ( $i, @path ) } | ||
904 : | } | ||
905 : | |||
906 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
907 : | $xmax += defined( $x ) ? $x : 0; # Convert undefined to 1 | ||
908 : | ( $xmax, $node, @pmax ); | ||
909 : | } | ||
910 : | |||
911 : | |||
912 : | #------------------------------------------------------------------------------- | ||
913 : | # Most distant tip from root, and its distance. | ||
914 : | # | ||
915 : | # ( $tipref, $xmax ) = newick_most_distant_tip_ref( $tree ) | ||
916 : | #------------------------------------------------------------------------------- | ||
917 : | sub newick_most_distant_tip_ref { | ||
918 : | my ( $node, $not_root ) = @_; | ||
919 : | |||
920 : | my $imax = newick_n_desc( $node ); | ||
921 : | my $xmax = ( $imax > 0 ) ? -1 : 0; | ||
922 : | my $tmax = $node; | ||
923 : | foreach ( newick_desc_list( $node ) ) { | ||
924 : | my ( $t, $x ) = newick_most_distant_tip_ref( $_, 1 ); | ||
925 : | if ( $x > $xmax ) { $xmax = $x; $tmax = $t } | ||
926 : | } | ||
927 : | |||
928 : | my $x = $not_root ? newick_x( $node ) : 0; | ||
929 : | $xmax += defined( $x ) ? $x : 1; # Convert undefined to 1 | ||
930 : | ( $tmax, $xmax ); | ||
931 : | } | ||
932 : | |||
933 : | |||
934 : | #------------------------------------------------------------------------------- | ||
935 : | # Name of most distant tip from root, and its distance. | ||
936 : | # | ||
937 : | # ( $tipname, $xmax ) = newick_most_distant_tip_name( $tree ) | ||
938 : | #------------------------------------------------------------------------------- | ||
939 : | sub newick_most_distant_tip_name { | ||
940 : | my ( $tipref, $xmax ) = newick_most_distant_tip_ref( $_[0] ); | ||
941 : | ( newick_lbl( $tipref ), $xmax ) | ||
942 : | } | ||
943 : | |||
944 : | |||
945 : | #------------------------------------------------------------------------------- | ||
946 : | golsen | 1.8 | # Tree tip insertion point (with standard node labels): |
947 : | # | ||
948 : | # [ $node1, $x1, $node2, $x2, $x ] | ||
949 : | # = newick_tip_insertion_point( $tree, $tip ) | ||
950 : | # | ||
951 : | # Which means: tip is on a branch of length x that is inserted into the branch | ||
952 : | # connecting node1 and node2, at distance x1 from node1 and x2 from node2. | ||
953 : | # | ||
954 : | # x1 +------ n1a (lowest sorting tip of this subtree) | ||
955 : | # +--------n1 | ||
956 : | # | +------n1b (lowest sorting tip of this subtree) | ||
957 : | # tip-------n | ||
958 : | # x | +------------- n2a (lowest sorting tip of this subtree) | ||
959 : | # +------n2 | ||
960 : | # x2 +-------- n2b (lowest sorting tip of this subtree) | ||
961 : | # | ||
962 : | # The designations of 1 vs 2, and a vs b are chosen such that: | ||
963 : | # n1a < n1b, and n2a < n2b, and n1a < n2a | ||
964 : | # | ||
965 : | # Then the statandard description becomes: | ||
966 : | # | ||
967 : | # [ [ $n1a, min(n1b,n2a), max(n1b,n2a) ], x1, | ||
968 : | # [ $n2a, min(n2b,n1a), max(n2b,n1a) ], x2, | ||
969 : | # x | ||
970 : | # ] | ||
971 : | # | ||
972 : | #------------------------------------------------------------------------------- | ||
973 : | sub newick_tip_insertion_point | ||
974 : | { | ||
975 : | my ( $tree, $tip ) = @_; | ||
976 : | $tree && $tip && ref( $tree ) eq 'ARRAY' or return undef; | ||
977 : | $tree = copy_newick_tree( $tree ) or return undef; | ||
978 : | $tree = reroot_newick_to_tip( $tree, $tip ) or return undef; | ||
979 : | my $node = $tree; | ||
980 : | |||
981 : | my $x = 0; # Distance to node | ||
982 : | my $dl = newick_desc_ref( $node ); # Descendent list of tip node; | ||
983 : | $node = $dl->[0]; # Node adjacent to tip | ||
984 : | $dl = newick_desc_ref( $node ); | ||
985 : | while ( $dl && ( @$dl == 1 ) ) # Traverse unbranched nodes | ||
986 : | { | ||
987 : | $node = $dl->[0]; | ||
988 : | $x += newick_x( $node ); | ||
989 : | $dl = newick_desc_ref( $node ); | ||
990 : | } | ||
991 : | $x += newick_x( $node ); | ||
992 : | |||
993 : | # We are now at the node that is the insertion point. | ||
994 : | # Is it a tip? | ||
995 : | |||
996 : | my @description; | ||
997 : | |||
998 : | if ( ( ! $dl ) || @$dl == 0 ) | ||
999 : | { | ||
1000 : | @description = ( [ newick_lbl( $node ) ], 0, undef, 0, $x ); | ||
1001 : | } | ||
1002 : | |||
1003 : | # Is it a trifurcation or greater, in which case it does not go | ||
1004 : | # away with tip deletion? | ||
1005 : | |||
1006 : | elsif ( @$dl > 2 ) | ||
1007 : | { | ||
1008 : | @description = ( [ std_node_name( $node, $node ) ], 0, undef, 0, $x ); | ||
1009 : | } | ||
1010 : | |||
1011 : | # The node is bifurcating. We need to describe it. | ||
1012 : | |||
1013 : | else | ||
1014 : | { | ||
1015 : | golsen | 1.9 | my ( $n1, $x1 ) = describe_descendant( $dl->[0] ); |
1016 : | my ( $n2, $x2 ) = describe_descendant( $dl->[1] ); | ||
1017 : | golsen | 1.8 | |
1018 : | if ( @$n1 == 2 ) { push @$n1, $n2->[0] } | ||
1019 : | if ( @$n2 == 2 ) | ||
1020 : | { | ||
1021 : | @$n2 = sort { lc $a cmp lc $b } ( @$n2, $n1->[0] ); | ||
1022 : | } | ||
1023 : | if ( @$n1 == 3 ) { @$n2 = sort { lc $a cmp lc $b } @$n2 } | ||
1024 : | @description = ( $n1, $x1, $n2, $x2, $x ); | ||
1025 : | } | ||
1026 : | |||
1027 : | return wantarray ? @description : \@description; | ||
1028 : | } | ||
1029 : | |||
1030 : | |||
1031 : | golsen | 1.9 | sub describe_descendant |
1032 : | golsen | 1.8 | { |
1033 : | my $node = shift; | ||
1034 : | |||
1035 : | my $x = 0; # Distance to node | ||
1036 : | my $dl = newick_desc_ref( $node ); # Descendent list of tip node; | ||
1037 : | while ( $dl && ( @$dl == 1 ) ) # Traverse unbranched nodes | ||
1038 : | { | ||
1039 : | $node = $dl->[0]; | ||
1040 : | golsen | 1.9 | $x += newick_x( $node ); |
1041 : | golsen | 1.8 | $dl = newick_desc_ref( $node ); |
1042 : | } | ||
1043 : | $x += newick_x( $node ); | ||
1044 : | |||
1045 : | # Is it a tip? Return list of one tip; | ||
1046 : | |||
1047 : | if ( ( ! $dl ) || @$dl == 0 ) | ||
1048 : | { | ||
1049 : | return ( [ newick_lbl( $node ) ], $x ); | ||
1050 : | } | ||
1051 : | |||
1052 : | # Get tips of each descendent, keeping lowest sorting from each. | ||
1053 : | # Return the two lowest of those (the third will come from the | ||
1054 : | # other side of the original node). | ||
1055 : | |||
1056 : | golsen | 1.9 | my @rep_tips = sort { lc $a cmp lc $b } |
1057 : | map { ( sort { lc $a cmp lc $b } newick_tip_list( $_ ) )[0] } | ||
1058 : | @$dl; | ||
1059 : | return ( [ @rep_tips[0,1] ], $x ); | ||
1060 : | golsen | 1.8 | } |
1061 : | |||
1062 : | |||
1063 : | #------------------------------------------------------------------------------- | ||
1064 : | golsen | 1.1 | # Standard node name: |
1065 : | # Tip label if at a tip | ||
1066 : | # Three sorted tip labels intersecting at node, each being smallest | ||
1067 : | # of all the tips of their subtrees | ||
1068 : | # | ||
1069 : | golsen | 1.9 | # @TipOrTips = std_node_name( $tree, $node ) |
1070 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
1071 : | sub std_node_name { | ||
1072 : | my $tree = $_[0]; | ||
1073 : | |||
1074 : | # Node reference is last element of path to node | ||
1075 : | |||
1076 : | my $noderef = ( path_to_node( @_ ) )[-1]; | ||
1077 : | defined( $noderef ) || return (); | ||
1078 : | |||
1079 : | if ( node_is_tip( $noderef ) || $noderef eq $tree ) { # Is it a tip? | ||
1080 : | return newick_lbl( $noderef ); | ||
1081 : | } | ||
1082 : | |||
1083 : | # Work through lists of tips in descendant subtrees, removing them from | ||
1084 : | # @rest, and keeping the best tip for each subtree. | ||
1085 : | |||
1086 : | golsen | 1.8 | my @rest = newick_tip_list( $tree ); |
1087 : | golsen | 1.9 | my @best = map |
1088 : | { | ||
1089 : | golsen | 1.8 | my @tips = sort { lc $a cmp lc $b } newick_tip_list( $_ ); |
1090 : | golsen | 1.9 | @rest = &set_difference( \@rest, \@tips ); |
1091 : | golsen | 1.1 | $tips[0]; |
1092 : | golsen | 1.9 | } newick_desc_list( $noderef ); |
1093 : | golsen | 1.1 | |
1094 : | # Best of the rest of the tree | ||
1095 : | push @best, ( sort { lc $a cmp lc $b } @rest )[0]; | ||
1096 : | |||
1097 : | # Take the top 3, in order: | ||
1098 : | |||
1099 : | ( @best >= 3 ) ? ( sort { lc $a cmp lc $b } @best )[0 .. 2] : (); | ||
1100 : | } | ||
1101 : | |||
1102 : | |||
1103 : | #=============================================================================== | ||
1104 : | # Functions to find paths in trees. | ||
1105 : | # | ||
1106 : | # Path descriptions are of form: | ||
1107 : | # ( $node0, $i0, $node1, $i1, $node2, $i2, ..., $nodeN ) # Always odd | ||
1108 : | # () is returned upon failure | ||
1109 : | # | ||
1110 : | # Numbering of descendants is 1-based. | ||
1111 : | #=============================================================================== | ||
1112 : | # Path to tip: | ||
1113 : | # | ||
1114 : | # @path = path_to_tip( $treenode, $tipname ) | ||
1115 : | #------------------------------------------------------------------------------- | ||
1116 : | sub path_to_tip { | ||
1117 : | my ( $node, $tip, @path0 ) = @_; | ||
1118 : | |||
1119 : | push @path0, $node; | ||
1120 : | my $imax = newick_n_desc( $node ); | ||
1121 : | if ( $imax < 1 ) { return ( newick_lbl( $node ) eq $tip ) ? @path0 : () } | ||
1122 : | |||
1123 : | # Special case for tree rooted on tip | ||
1124 : | |||
1125 : | if ( ( $imax == 1 ) # One descendant | ||
1126 : | && ( @path0 == 1 ) # First step in path | ||
1127 : | && ( newick_lbl( $node ) eq $tip ) # Label matches | ||
1128 : | ) { return @path0 } | ||
1129 : | |||
1130 : | my @path; | ||
1131 : | for (my $i = 1; $i <= $imax; $i++ ) { | ||
1132 : | @path = path_to_tip( newick_desc_i( $node, $i ), $tip, ( @path0, $i ) ); | ||
1133 : | if ( @path ) { return @path } | ||
1134 : | } | ||
1135 : | |||
1136 : | (); # Not found | ||
1137 : | } | ||
1138 : | |||
1139 : | |||
1140 : | #------------------------------------------------------------------------------- | ||
1141 : | # Path to named node. | ||
1142 : | # Like path to tip, but will find named internal nodes as well. | ||
1143 : | # | ||
1144 : | # @path = path_to_named_node( $treenode, $name ) | ||
1145 : | #------------------------------------------------------------------------------- | ||
1146 : | sub path_to_named_node { | ||
1147 : | my ( $node, $name, @path0 ) = @_; | ||
1148 : | |||
1149 : | push @path0, $node; | ||
1150 : | if ( newick_lbl( $node ) eq $name ) { return @path0 } | ||
1151 : | |||
1152 : | my @path; | ||
1153 : | my $imax = newick_n_desc( $node ); | ||
1154 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
1155 : | @path = path_to_named_node( newick_desc_i( $node, $i ), $name, ( @path0, $i ) ); | ||
1156 : | if ( @path ) { return @path } | ||
1157 : | } | ||
1158 : | |||
1159 : | (); # Not found | ||
1160 : | } | ||
1161 : | |||
1162 : | |||
1163 : | #------------------------------------------------------------------------------- | ||
1164 : | # Path to node reference. | ||
1165 : | # | ||
1166 : | # @path = path_to_node_ref( $treenode, $noderef ) | ||
1167 : | #------------------------------------------------------------------------------- | ||
1168 : | sub path_to_node_ref { | ||
1169 : | my ( $node, $noderef, @path0 ) = @_; | ||
1170 : | |||
1171 : | push @path0, $node; | ||
1172 : | if ( $node eq $noderef ) { return @path0 } | ||
1173 : | |||
1174 : | my @path; | ||
1175 : | my $imax = newick_n_desc( $node ); | ||
1176 : | for ( my $i = 1; $i <= $imax; $i++ ) { | ||
1177 : | golsen | 1.12 | @path = path_to_node_ref( newick_desc_i( $node, $i ), $noderef, ( @path0, $i ) ); |
1178 : | return @path if @path; | ||
1179 : | golsen | 1.1 | } |
1180 : | |||
1181 : | (); # Not found | ||
1182 : | } | ||
1183 : | |||
1184 : | |||
1185 : | #------------------------------------------------------------------------------- | ||
1186 : | # Path to node, as defined by 1 or 3 tips. | ||
1187 : | # | ||
1188 : | # @path = path_to_node( $node, $tip1, $tip2, $tip3 ) # 3 tip names | ||
1189 : | # @path = path_to_node( $node, [ $tip1, $tip2, $tip3 ] ) # Allow array ref | ||
1190 : | # @path = path_to_node( $node, $tip1 ) # Use path_to_tip | ||
1191 : | # @path = path_to_node( $node, [ $tip1 ] ) # Use path_to_tip | ||
1192 : | #------------------------------------------------------------------------------- | ||
1193 : | sub path_to_node { | ||
1194 : | my ( $node, $tip1, $tip2, $tip3 ) = @_; | ||
1195 : | array_ref( $node ) && defined( $tip1 ) || return (); | ||
1196 : | |||
1197 : | # Allow arg 2 to be an array reference | ||
1198 : | if ( array_ref( $tip1 ) ) { ( $tip1, $tip2, $tip3 ) = @$tip1 } | ||
1199 : | |||
1200 : | my @p1 = path_to_tip( $node, $tip1 ); # Path to first tip | ||
1201 : | @p1 || return (); # Was the tip found? | ||
1202 : | defined( $tip2 ) && defined( $tip3 ) || return @p1; # Two more defined? | ||
1203 : | |||
1204 : | my @p2 = path_to_tip( $node, $tip2 ); | ||
1205 : | my @p3 = path_to_tip( $node, $tip3 ); | ||
1206 : | @p2 && @p3 || return (); # Were they found? | ||
1207 : | |||
1208 : | # Find the common prefix for each pair of paths | ||
1209 : | golsen | 1.9 | my @p12 = &common_prefix( \@p1, \@p2 ); |
1210 : | my @p13 = &common_prefix( \@p1, \@p3 ); | ||
1211 : | my @p23 = &common_prefix( \@p2, \@p3 ); | ||
1212 : | golsen | 1.1 | |
1213 : | # Return the longest common prefix of any two paths | ||
1214 : | ( @p12 >= @p13 && @p12 >= @p23 ) ? @p12 : | ||
1215 : | ( @p13 >= @p23 ) ? @p13 : | ||
1216 : | @p23 ; | ||
1217 : | } | ||
1218 : | |||
1219 : | |||
1220 : | #------------------------------------------------------------------------------- | ||
1221 : | # Distance along path. | ||
1222 : | # | ||
1223 : | # $distance = newick_path_length( @path ) | ||
1224 : | #------------------------------------------------------------------------------- | ||
1225 : | sub newick_path_length { | ||
1226 : | my $node = shift; # Discard the first node | ||
1227 : | array_ref( $node ) || return undef; | ||
1228 : | @_ ? distance_along_path_2( @_ ) : 0; | ||
1229 : | } | ||
1230 : | |||
1231 : | |||
1232 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1233 : | # This expects to get path minus root node: | ||
1234 : | # | ||
1235 : | # $distance = distance_along_path_2( @path ) | ||
1236 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1237 : | sub distance_along_path_2 { | ||
1238 : | shift; # Discard descendant number | ||
1239 : | my $node = shift; | ||
1240 : | array_ref( $node ) || return undef; | ||
1241 : | my $d1 = newick_x( $node ); | ||
1242 : | golsen | 1.8 | my $d2 = @_ ? distance_along_path_2( @_ ) : 0; |
1243 : | golsen | 1.1 | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; |
1244 : | } | ||
1245 : | |||
1246 : | |||
1247 : | #------------------------------------------------------------------------------- | ||
1248 : | # Tip-to-tip distance. | ||
1249 : | # | ||
1250 : | # $distance = tip_to_tip_distance( $tree, $tip1, $tip2 ) | ||
1251 : | #------------------------------------------------------------------------------- | ||
1252 : | sub tip_to_tip_distance { | ||
1253 : | my ( $node, $tip1, $tip2 ) = @_; | ||
1254 : | |||
1255 : | array_ref( $node ) && defined( $tip1 ) | ||
1256 : | && defined( $tip2 ) || return undef; | ||
1257 : | my @p1 = path_to_tip( $node, $tip1 ); | ||
1258 : | my @p2 = path_to_tip( $node, $tip2 ); | ||
1259 : | @p1 && @p2 || return undef; # Were they found? | ||
1260 : | |||
1261 : | # Find the unique suffixes of the two paths | ||
1262 : | golsen | 1.9 | my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost |
1263 : | golsen | 1.1 | my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0; |
1264 : | my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0; | ||
1265 : | |||
1266 : | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; | ||
1267 : | } | ||
1268 : | |||
1269 : | |||
1270 : | #------------------------------------------------------------------------------- | ||
1271 : | # Node-to-node distance. | ||
1272 : | # Nodes can be: $tipname | ||
1273 : | # [ $tipname ] | ||
1274 : | # [ $tipname1, $tipname2, $tipname3 ] | ||
1275 : | # | ||
1276 : | # $distance = node_to_node_distance( $tree, $node1, $node2 ) | ||
1277 : | #------------------------------------------------------------------------------- | ||
1278 : | sub node_to_node_distance { | ||
1279 : | my ( $node, $node1, $node2 ) = @_; | ||
1280 : | |||
1281 : | array_ref( $node ) && defined( $node1 ) | ||
1282 : | && defined( $node2 ) || return undef; | ||
1283 : | golsen | 1.8 | my @p1 = path_to_node( $node, $node1 ) or return undef; |
1284 : | my @p2 = path_to_node( $node, $node2 ) or return undef; | ||
1285 : | golsen | 1.1 | |
1286 : | # Find the unique suffixes of the two paths | ||
1287 : | golsen | 1.9 | my ( $suf1, $suf2 ) = &unique_suffixes( \@p1, \@p2 ); # Common node is lost |
1288 : | golsen | 1.1 | my $d1 = @$suf1 ? distance_along_path_2( @$suf1 ) : 0; |
1289 : | my $d2 = @$suf2 ? distance_along_path_2( @$suf2 ) : 0; | ||
1290 : | |||
1291 : | defined( $d1 ) && defined( $d2 ) ? $d1 + $d2 : undef; | ||
1292 : | } | ||
1293 : | |||
1294 : | |||
1295 : | #=============================================================================== | ||
1296 : | # Tree manipulations: | ||
1297 : | #=============================================================================== | ||
1298 : | # Copy tree. | ||
1299 : | # Lists are copied, except that references to empty lists go to undef. | ||
1300 : | # Only defined fields are added, so tree list may be shorter than 8 fields. | ||
1301 : | # | ||
1302 : | overbeek | 1.4 | # $treecopy = copy_newick_tree( $tree ) |
1303 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
1304 : | overbeek | 1.4 | sub copy_newick_tree { |
1305 : | golsen | 1.1 | my ( $node ) = @_; |
1306 : | array_ref( $node ) || return undef; | ||
1307 : | |||
1308 : | my $nn = []; # Reference to a new node structure | ||
1309 : | # Build a new descendant list, if not empty | ||
1310 : | my @dl = newick_desc_list( $node ); | ||
1311 : | overbeek | 1.4 | set_newick_desc_ref( $nn, @dl ? [ map { copy_newick_tree( $_ ) } @dl ] |
1312 : | golsen | 1.1 | : undef |
1313 : | ); | ||
1314 : | |||
1315 : | # Copy label and x, if defined | ||
1316 : | my ( $l, $x ); | ||
1317 : | if ( defined( $l = newick_lbl( $node ) ) ) { set_newick_lbl( $nn, $l ) } | ||
1318 : | if ( defined( $x = newick_x( $node ) ) ) { set_newick_x( $nn, $x ) } | ||
1319 : | |||
1320 : | # Build new comment lists, when not empty ( does not extend array unless | ||
1321 : | # necessary) | ||
1322 : | my $c; | ||
1323 : | if ( $c = newick_c1( $node ) and @$c ) { set_newick_c1( $nn, [ @$c ] ) } | ||
1324 : | if ( $c = newick_c2( $node ) and @$c ) { set_newick_c2( $nn, [ @$c ] ) } | ||
1325 : | if ( $c = newick_c3( $node ) and @$c ) { set_newick_c3( $nn, [ @$c ] ) } | ||
1326 : | if ( $c = newick_c4( $node ) and @$c ) { set_newick_c4( $nn, [ @$c ] ) } | ||
1327 : | if ( $c = newick_c5( $node ) and @$c ) { set_newick_c5( $nn, [ @$c ] ) } | ||
1328 : | |||
1329 : | $nn; | ||
1330 : | } | ||
1331 : | |||
1332 : | |||
1333 : | #------------------------------------------------------------------------------- | ||
1334 : | # Use a hash to relabel the nodes in a newick tree. | ||
1335 : | # | ||
1336 : | # $newtree = newick_relabel_nodes( $node, \%new_name ) | ||
1337 : | #------------------------------------------------------------------------------- | ||
1338 : | sub newick_relabel_nodes { | ||
1339 : | my ( $node, $new_name ) = @_; | ||
1340 : | |||
1341 : | my ( $lbl, $new ); | ||
1342 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1343 : | && ( $lbl ne "" ) | ||
1344 : | && defined( $new = $new_name->{ $lbl } ) | ||
1345 : | ) { | ||
1346 : | set_newick_lbl( $node, $new ); | ||
1347 : | } | ||
1348 : | |||
1349 : | foreach ( newick_desc_list( $node ) ) { | ||
1350 : | newick_relabel_nodes( $_, $new_name ); | ||
1351 : | } | ||
1352 : | |||
1353 : | $node; | ||
1354 : | } | ||
1355 : | |||
1356 : | |||
1357 : | #------------------------------------------------------------------------------- | ||
1358 : | # Use a hash to relabel the nodes in a newick tree (case insensitive). | ||
1359 : | # | ||
1360 : | # $newtree = newick_relabel_nodes_i( $node, \%new_name ) | ||
1361 : | #------------------------------------------------------------------------------- | ||
1362 : | sub newick_relabel_nodes_i { | ||
1363 : | my ( $node, $new_name ) = @_; | ||
1364 : | |||
1365 : | # Add any necessary lowercase keys to the hash: | ||
1366 : | |||
1367 : | my $lc_lbl; | ||
1368 : | foreach ( keys %$new_name ) { | ||
1369 : | $lc_lbl = lc $_; | ||
1370 : | ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } ); | ||
1371 : | } | ||
1372 : | |||
1373 : | newick_relabel_nodes_i2( $node, $new_name ); | ||
1374 : | } | ||
1375 : | |||
1376 : | |||
1377 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1378 : | # Do the actual relabeling | ||
1379 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1380 : | sub newick_relabel_nodes_i2 { | ||
1381 : | my ( $node, $new_name ) = @_; | ||
1382 : | |||
1383 : | my ( $lbl, $new ); | ||
1384 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1385 : | && ( $lbl ne "" ) | ||
1386 : | && defined( $new = $new_name->{ lc $lbl } ) | ||
1387 : | ) { | ||
1388 : | set_newick_lbl( $node, $new ); | ||
1389 : | } | ||
1390 : | |||
1391 : | foreach ( newick_desc_list( $node ) ) { | ||
1392 : | newick_relabel_nodes_i2( $_, $new_name ); | ||
1393 : | } | ||
1394 : | |||
1395 : | $node; | ||
1396 : | } | ||
1397 : | |||
1398 : | |||
1399 : | #------------------------------------------------------------------------------- | ||
1400 : | # Use a hash to relabel the tips in a newick tree. | ||
1401 : | # | ||
1402 : | # $newtree = newick_relabel_tips( $node, \%new_name ) | ||
1403 : | #------------------------------------------------------------------------------- | ||
1404 : | sub newick_relabel_tips { | ||
1405 : | my ( $node, $new_name ) = @_; | ||
1406 : | |||
1407 : | my @desc = newick_desc_list( $node ); | ||
1408 : | |||
1409 : | if ( @desc ) { | ||
1410 : | foreach ( @desc ) { newick_relabel_tips( $_, $new_name ) } | ||
1411 : | } | ||
1412 : | else { | ||
1413 : | my ( $lbl, $new ); | ||
1414 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1415 : | && ( $lbl ne "" ) | ||
1416 : | && defined( $new = $new_name->{ $lbl } ) | ||
1417 : | ) { | ||
1418 : | set_newick_lbl( $node, $new ); | ||
1419 : | } | ||
1420 : | } | ||
1421 : | |||
1422 : | $node; | ||
1423 : | } | ||
1424 : | |||
1425 : | |||
1426 : | #------------------------------------------------------------------------------- | ||
1427 : | # Use a hash to relabel the tips in a newick tree (case insensitive). | ||
1428 : | # | ||
1429 : | # $newtree = newick_relabel_tips_i( $node, \%new_name ) | ||
1430 : | #------------------------------------------------------------------------------- | ||
1431 : | sub newick_relabel_tips_i { | ||
1432 : | my ( $node, $new_name ) = @_; | ||
1433 : | |||
1434 : | # Add any necessary lowercase keys to the hash: | ||
1435 : | |||
1436 : | my $lc_lbl; | ||
1437 : | foreach ( keys %$new_name ) { | ||
1438 : | $lc_lbl = lc $_; | ||
1439 : | ( $lc_lbl eq $_ ) or ( $new_name->{ $lc_lbl } = $new_name->{ $_ } ); | ||
1440 : | } | ||
1441 : | |||
1442 : | newick_relabel_tips_i2( $node, $new_name ); | ||
1443 : | } | ||
1444 : | |||
1445 : | |||
1446 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1447 : | # Do the actual relabeling | ||
1448 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1449 : | sub newick_relabel_tips_i2 { | ||
1450 : | my ( $node, $new_name ) = @_; | ||
1451 : | |||
1452 : | my @desc = newick_desc_list( $node ); | ||
1453 : | |||
1454 : | if ( @desc ) { | ||
1455 : | foreach ( @desc ) { newick_relabel_tips_i2( $_, $new_name ) } | ||
1456 : | } | ||
1457 : | else { | ||
1458 : | my ( $lbl, $new ); | ||
1459 : | if ( defined( $lbl = newick_lbl( $node ) ) | ||
1460 : | && ( $lbl ne "" ) | ||
1461 : | && defined( $new = $new_name->{ lc $lbl } ) | ||
1462 : | ) { | ||
1463 : | set_newick_lbl( $node, $new ); | ||
1464 : | } | ||
1465 : | } | ||
1466 : | |||
1467 : | $node; | ||
1468 : | } | ||
1469 : | |||
1470 : | |||
1471 : | #------------------------------------------------------------------------------- | ||
1472 : | # Set undefined branch lenghts (except root) to length x. | ||
1473 : | # | ||
1474 : | # $n_changed = newick_set_undefined_branches( $node, $x ) | ||
1475 : | #------------------------------------------------------------------------------- | ||
1476 : | sub newick_set_undefined_branches { | ||
1477 : | my ( $node, $x, $not_root ) = @_; | ||
1478 : | |||
1479 : | my $n = 0; | ||
1480 : | if ( $not_root && ! defined( newick_x( $node ) ) ) { | ||
1481 : | set_newick_x( $node, $x ); | ||
1482 : | $n++; | ||
1483 : | } | ||
1484 : | |||
1485 : | foreach ( newick_desc_list( $node ) ) { | ||
1486 : | $n += newick_set_undefined_branches( $_, $x, 1 ); | ||
1487 : | } | ||
1488 : | |||
1489 : | $n; | ||
1490 : | } | ||
1491 : | |||
1492 : | |||
1493 : | #------------------------------------------------------------------------------- | ||
1494 : | # Set all branch lenghts (except root) to length x. | ||
1495 : | # | ||
1496 : | # $n_changed = newick_set_all_branches( $node, $x ) | ||
1497 : | #------------------------------------------------------------------------------- | ||
1498 : | sub newick_set_all_branches { | ||
1499 : | my ( $node, $x, $not_root ) = @_; | ||
1500 : | |||
1501 : | my $n = 0; | ||
1502 : | overbeek | 1.7 | if ( $not_root ) |
1503 : | { | ||
1504 : | golsen | 1.1 | set_newick_x( $node, $x ); |
1505 : | $n++; | ||
1506 : | } | ||
1507 : | |||
1508 : | overbeek | 1.7 | foreach ( newick_desc_list( $node ) ) |
1509 : | { | ||
1510 : | golsen | 1.1 | $n += newick_set_all_branches( $_, $x, 1 ); |
1511 : | } | ||
1512 : | |||
1513 : | $n; | ||
1514 : | } | ||
1515 : | |||
1516 : | |||
1517 : | #------------------------------------------------------------------------------- | ||
1518 : | overbeek | 1.7 | # Rescale all branch lenghts by factor. |
1519 : | # | ||
1520 : | # $node = newick_rescale_branches( $node, $factor ) | ||
1521 : | #------------------------------------------------------------------------------- | ||
1522 : | sub newick_rescale_branches { | ||
1523 : | my ( $node, $factor ) = @_; | ||
1524 : | |||
1525 : | my $x = newick_x( $node ); | ||
1526 : | set_newick_x( $node, $factor * $x ) if $x; | ||
1527 : | |||
1528 : | foreach ( newick_desc_list( $node ) ) | ||
1529 : | { | ||
1530 : | newick_rescale_branches( $_, $factor ); | ||
1531 : | } | ||
1532 : | |||
1533 : | $node; | ||
1534 : | } | ||
1535 : | |||
1536 : | |||
1537 : | #------------------------------------------------------------------------------- | ||
1538 : | golsen | 1.5 | # Set negative branches to zero. The original tree is modfied. |
1539 : | # | ||
1540 : | # $n_changed = newick_fix_negative_branches( $tree ) | ||
1541 : | #------------------------------------------------------------------------------- | ||
1542 : | sub newick_fix_negative_branches { | ||
1543 : | my ( $tree ) = @_; | ||
1544 : | array_ref( $tree ) or return undef; | ||
1545 : | my $n_changed = 0; | ||
1546 : | my $x = newick_x( $tree ); | ||
1547 : | if ( defined( $x ) and $x < 0 ) | ||
1548 : | { | ||
1549 : | set_newick_x( $tree, 0 ); | ||
1550 : | $n_changed++; | ||
1551 : | } | ||
1552 : | |||
1553 : | foreach ( newick_desc_list( $tree ) ) | ||
1554 : | { | ||
1555 : | $n_changed += newick_fix_negative_branches( $_ ); | ||
1556 : | } | ||
1557 : | |||
1558 : | $n_changed; | ||
1559 : | } | ||
1560 : | |||
1561 : | |||
1562 : | #------------------------------------------------------------------------------- | ||
1563 : | golsen | 1.8 | # Remove comments from a newick tree (e.g., before writing for phylip). |
1564 : | # | ||
1565 : | # $node = newick_strip_comments( $node ) | ||
1566 : | #------------------------------------------------------------------------------- | ||
1567 : | sub newick_strip_comments { | ||
1568 : | my ( $node ) = @_; | ||
1569 : | |||
1570 : | @$node = @$node[ 0 .. 2 ]; | ||
1571 : | foreach ( newick_desc_list( $node ) ) { newick_strip_comments( $_ ) } | ||
1572 : | $node; | ||
1573 : | } | ||
1574 : | |||
1575 : | |||
1576 : | #------------------------------------------------------------------------------- | ||
1577 : | golsen | 1.1 | # Normalize tree order (in place). |
1578 : | # | ||
1579 : | # ( $tree, $label1 ) = normalize_newick_tree( $tree ) | ||
1580 : | #------------------------------------------------------------------------------- | ||
1581 : | sub normalize_newick_tree { | ||
1582 : | my ( $node ) = @_; | ||
1583 : | |||
1584 : | my @descends = newick_desc_list( $node ); | ||
1585 : | if ( @descends == 0 ) { return ( $node, lc newick_lbl( $node ) ) } | ||
1586 : | |||
1587 : | golsen | 1.9 | my %hash = map { ( normalize_newick_tree($_) )[1] => $_ } @descends; |
1588 : | golsen | 1.1 | my @keylist = sort { $a cmp $b } keys %hash; |
1589 : | set_newick_desc_list( $node, map { $hash{$_} } @keylist ); | ||
1590 : | |||
1591 : | ( $node, $keylist[0] ); | ||
1592 : | } | ||
1593 : | |||
1594 : | |||
1595 : | #------------------------------------------------------------------------------- | ||
1596 : | # Reverse tree order (in place). | ||
1597 : | # | ||
1598 : | # $tree = reverse_newick_tree( $tree ) | ||
1599 : | #------------------------------------------------------------------------------- | ||
1600 : | sub reverse_newick_tree { | ||
1601 : | my ( $node ) = @_; | ||
1602 : | |||
1603 : | my @descends = newick_desc_list( $node ); | ||
1604 : | if ( @descends ) { | ||
1605 : | set_newick_desc_list( $node, reverse @descends ); | ||
1606 : | foreach ( @descends ) { reverse_newick_tree( $_ ) } | ||
1607 : | } | ||
1608 : | $node; | ||
1609 : | } | ||
1610 : | |||
1611 : | |||
1612 : | #------------------------------------------------------------------------------- | ||
1613 : | # Standard unrooted tree (in place). | ||
1614 : | # | ||
1615 : | # $stdtree = std_unrooted_newick( $tree ) | ||
1616 : | #------------------------------------------------------------------------------- | ||
1617 : | sub std_unrooted_newick { | ||
1618 : | my ( $tree ) = @_; | ||
1619 : | |||
1620 : | my ( $mintip ) = sort { lc $a cmp lc $b } newick_tip_list( $tree ); | ||
1621 : | ( normalize_newick_tree( reroot_newick_next_to_tip( $tree, $mintip ) ) )[0]; | ||
1622 : | } | ||
1623 : | |||
1624 : | |||
1625 : | #------------------------------------------------------------------------------- | ||
1626 : | # Move largest groups to periphery of tree (in place). | ||
1627 : | # | ||
1628 : | # dir <= -2 for up-sweeping tree (big groups always first), | ||
1629 : | # = -1 for big group first, balanced tree, | ||
1630 : | # = 0 for balanced tree, | ||
1631 : | # = 1 for small group first, balanced tree, and | ||
1632 : | # >= 2 for down-sweeping tree (small groups always top) | ||
1633 : | # | ||
1634 : | # $tree = aesthetic_newick_tree( $treeref, $dir ) | ||
1635 : | #------------------------------------------------------------------------------- | ||
1636 : | sub aesthetic_newick_tree { | ||
1637 : | my ( $tree, $dir ) = @_; | ||
1638 : | my %cnt; | ||
1639 : | |||
1640 : | $dir = ! $dir ? 0 : # Undefined or zero | ||
1641 : | $dir <= -2 ? -1000000 : | ||
1642 : | $dir < 0 ? -1 : | ||
1643 : | $dir >= 2 ? 1000000 : | ||
1644 : | 1 ; | ||
1645 : | build_tip_count_hash( $tree, \%cnt ); | ||
1646 : | reorder_by_tip_count( $tree, \%cnt, $dir ); | ||
1647 : | } | ||
1648 : | |||
1649 : | |||
1650 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1651 : | # Build a hash to look up the number of descendants of each node. | ||
1652 : | # Access count with $cntref->{$noderef} | ||
1653 : | # | ||
1654 : | # $count = build_tip_count_hash( $node, $cnt_hash_ref ) | ||
1655 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1656 : | sub build_tip_count_hash { | ||
1657 : | my ( $node, $cntref ) = @_; | ||
1658 : | my ( $i, $cnt ); | ||
1659 : | |||
1660 : | if ( newick_n_desc( $node ) < 1 ) { $cnt = 1 } | ||
1661 : | else { | ||
1662 : | $cnt = 0; | ||
1663 : | foreach ( newick_desc_list( $node ) ) { | ||
1664 : | $cnt += build_tip_count_hash( $_, $cntref ); | ||
1665 : | } | ||
1666 : | } | ||
1667 : | |||
1668 : | $cntref->{$node} = $cnt; | ||
1669 : | $cnt; | ||
1670 : | } | ||
1671 : | |||
1672 : | |||
1673 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1674 : | # $node = reorder_by_tip_count( $node, $cntref, $dir ) | ||
1675 : | # dir < 0 for upward branch (big group first), | ||
1676 : | # = 0 for no change, and | ||
1677 : | # > 0 for downward branch (small group first). | ||
1678 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1679 : | sub reorder_by_tip_count { | ||
1680 : | my ( $node, $cntref, $dir ) = @_; | ||
1681 : | |||
1682 : | my $nd = newick_n_desc( $node ); | ||
1683 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1684 : | |||
1685 : | # Reorder this subtree: | ||
1686 : | |||
1687 : | my $dl_ref = newick_desc_ref( $node ); | ||
1688 : | if ( $dir < 0 ) { # Big group first | ||
1689 : | @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref; | ||
1690 : | } | ||
1691 : | elsif ( $dir > 0 ) { # Small group first | ||
1692 : | @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref; | ||
1693 : | } | ||
1694 : | |||
1695 : | # Reorder within descendant subtrees: | ||
1696 : | |||
1697 : | my $step = 0; | ||
1698 : | if ( abs( $dir ) < 1e5 ) { | ||
1699 : | $dir = 1 - $nd; # Midgroup => as is | ||
1700 : | # $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 ); # Midgroup => outward | ||
1701 : | $step = 2; | ||
1702 : | } | ||
1703 : | |||
1704 : | for ( my $i = 0; $i < $nd; $i++ ) { | ||
1705 : | reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir ); | ||
1706 : | $dir += $step; | ||
1707 : | } | ||
1708 : | |||
1709 : | $node; | ||
1710 : | } | ||
1711 : | |||
1712 : | |||
1713 : | #------------------------------------------------------------------------------- | ||
1714 : | # Move smallest groups to periphery of tree (in place). | ||
1715 : | # | ||
1716 : | # dir <= -2 for up-sweeping tree (big groups always first), | ||
1717 : | # = -1 for big group first, balanced tree, | ||
1718 : | # = 0 for balanced tree, | ||
1719 : | # = 1 for small group first, balanced tree, and | ||
1720 : | # >= 2 for down-sweeping tree (small groups always top) | ||
1721 : | # | ||
1722 : | # $tree = unaesthetic_newick_tree( $treeref, $dir ) | ||
1723 : | #------------------------------------------------------------------------------- | ||
1724 : | golsen | 1.9 | sub unaesthetic_newick_tree |
1725 : | { | ||
1726 : | golsen | 1.1 | my ( $tree, $dir ) = @_; |
1727 : | my %cnt; | ||
1728 : | |||
1729 : | $dir = ! $dir ? 0 : # Undefined or zero | ||
1730 : | $dir <= -2 ? -1000000 : | ||
1731 : | $dir < 0 ? -1 : | ||
1732 : | $dir >= 2 ? 1000000 : | ||
1733 : | 1 ; | ||
1734 : | build_tip_count_hash( $tree, \%cnt ); | ||
1735 : | reorder_against_tip_count( $tree, \%cnt, $dir ); | ||
1736 : | } | ||
1737 : | |||
1738 : | |||
1739 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1740 : | # $node = reorder_by_tip_count( $node, $cntref, $dir ) | ||
1741 : | # dir < 0 for upward branch (big group first), | ||
1742 : | # = 0 for no change, and | ||
1743 : | # > 0 for downward branch (small group first). | ||
1744 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
1745 : | golsen | 1.9 | sub reorder_against_tip_count |
1746 : | { | ||
1747 : | golsen | 1.1 | my ( $node, $cntref, $dir ) = @_; |
1748 : | |||
1749 : | my $nd = newick_n_desc( $node ); | ||
1750 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1751 : | |||
1752 : | # Reorder this subtree: | ||
1753 : | |||
1754 : | my $dl_ref = newick_desc_ref( $node ); | ||
1755 : | if ( $dir > 0 ) { # Big group first | ||
1756 : | @$dl_ref = sort { $cntref->{$b} <=> $cntref->{$a} } @$dl_ref; | ||
1757 : | } | ||
1758 : | elsif ( $dir < 0 ) { # Small group first | ||
1759 : | @$dl_ref = sort { $cntref->{$a} <=> $cntref->{$b} } @$dl_ref; | ||
1760 : | } | ||
1761 : | |||
1762 : | # Reorder within descendant subtrees: | ||
1763 : | |||
1764 : | my $step = 0; | ||
1765 : | if (abs( $dir ) < 1e5) { | ||
1766 : | $dir = 1 - $nd; # Midgroup => as is | ||
1767 : | # $dir = 1 - $nd + ( $dir < 0 ? -0.5 : 0.5 ); # Midgroup => outward | ||
1768 : | $step = 2; | ||
1769 : | } | ||
1770 : | |||
1771 : | for ( my $i = 0; $i < $nd; $i++ ) { | ||
1772 : | reorder_by_tip_count( $dl_ref->[$i], $cntref, $dir ); | ||
1773 : | $dir += $step; | ||
1774 : | } | ||
1775 : | |||
1776 : | $node; | ||
1777 : | } | ||
1778 : | |||
1779 : | |||
1780 : | #------------------------------------------------------------------------------- | ||
1781 : | # Randomize descendant order at each node (in place). | ||
1782 : | # | ||
1783 : | # $tree = random_order_newick_tree( $tree ) | ||
1784 : | #------------------------------------------------------------------------------- | ||
1785 : | golsen | 1.9 | sub random_order_newick_tree |
1786 : | { | ||
1787 : | golsen | 1.1 | my ( $node ) = @_; |
1788 : | |||
1789 : | my $nd = newick_n_desc( $node ); | ||
1790 : | if ( $nd < 1 ) { return $node } # Do nothing to a tip | ||
1791 : | |||
1792 : | # Reorder this subtree: | ||
1793 : | |||
1794 : | my $dl_ref = newick_desc_ref( $node ); | ||
1795 : | golsen | 1.9 | @$dl_ref = &random_order( @$dl_ref ); |
1796 : | golsen | 1.1 | |
1797 : | # Reorder descendants: | ||
1798 : | |||
1799 : | foreach ( @$dl_ref ) { random_order_newick_tree( $_ ) } | ||
1800 : | |||
1801 : | $node; | ||
1802 : | } | ||
1803 : | |||
1804 : | |||
1805 : | #------------------------------------------------------------------------------- | ||
1806 : | # Reroot a tree to the node that lies at the end of a path. | ||
1807 : | # | ||
1808 : | # $newtree = reroot_newick_by_path( @path ) | ||
1809 : | #------------------------------------------------------------------------------- | ||
1810 : | golsen | 1.9 | sub reroot_newick_by_path |
1811 : | { | ||
1812 : | golsen | 1.1 | my ( $node1, $path1, @rest ) = @_; |
1813 : | array_ref( $node1 ) || return undef; # Always expect a node | ||
1814 : | |||
1815 : | defined( $path1 ) && @rest || return $node1; # If no path, we're done | ||
1816 : | |||
1817 : | my $node2 = $rest[0]; # Next element in path is node 2 | ||
1818 : | newick_desc_i( $node1, $path1 ) eq $node2 || return undef; # Check link | ||
1819 : | |||
1820 : | # Remove node 2 from node 1 descendant list. Could use a simple splice: | ||
1821 : | # | ||
1822 : | # splice( @$dl1, $path1-1, 1 ); | ||
1823 : | # | ||
1824 : | golsen | 1.8 | # But the following maintains the cyclic order of the nodes: |
1825 : | golsen | 1.1 | |
1826 : | my $dl1 = newick_desc_ref( $node1 ); | ||
1827 : | my $nd1 = @$dl1; | ||
1828 : | if ( $path1 == 1 ) { shift @$dl1 } | ||
1829 : | elsif ( $path1 == $nd1 ) { pop @$dl1 } | ||
1830 : | else { @$dl1 = ( @$dl1[ $path1 .. $nd1-1 ] | ||
1831 : | , @$dl1[ 0 .. $path1-2 ] | ||
1832 : | ) | ||
1833 : | golsen | 1.8 | } |
1834 : | golsen | 1.1 | |
1835 : | # Append node 1 to node 2 descendant list (does not alter numbering): | ||
1836 : | |||
1837 : | my $dl2 = newick_desc_ref( $node2 ); | ||
1838 : | if ( array_ref( $dl2 ) ) { push @$dl2, $node1 } | ||
1839 : | golsen | 1.8 | else { set_newick_desc_list( $node2, $node1 ) } |
1840 : | golsen | 1.1 | |
1841 : | # Move c1 comments from node 1 to node 2: | ||
1842 : | |||
1843 : | my $C11 = newick_c1( $node1 ); | ||
1844 : | my $C12 = newick_c1( $node2 ); | ||
1845 : | ! defined( $C11 ) || set_newick_c1( $node1, undef ); # Remove them from node 1 | ||
1846 : | golsen | 1.8 | if ( $C12 && @$C12 ) { # If node 2 comments and |
1847 : | if ( $C11 && @$C11 ) { unshift @$C12, @$C11 } # Node 1, prefix 1 to 2 | ||
1848 : | golsen | 1.1 | } |
1849 : | golsen | 1.8 | elsif ( $C11 && @$C11 ) { set_newick_c1( $node2, $C11 ) } # Otherwise move node 1 link |
1850 : | golsen | 1.1 | |
1851 : | # Swap branch lengths and comments for reversal of link direction: | ||
1852 : | |||
1853 : | my $x1 = newick_x( $node1 ); | ||
1854 : | my $x2 = newick_x( $node2 ); | ||
1855 : | ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node1, $x2 ); | ||
1856 : | ! defined( $x1 ) && ! defined ( $x2 ) || set_newick_x( $node2, $x1 ); | ||
1857 : | |||
1858 : | my $c41 = newick_c4( $node1 ); | ||
1859 : | my $c42 = newick_c4( $node2 ); | ||
1860 : | ! defined( $c42 ) || ! @$c42 || set_newick_c4( $node1, $c42 ); | ||
1861 : | ! defined( $c41 ) || ! @$c41 || set_newick_c4( $node2, $c41 ); | ||
1862 : | |||
1863 : | my $c51 = newick_c5( $node1 ); | ||
1864 : | my $c52 = newick_c5( $node2 ); | ||
1865 : | ! defined( $c52 ) || ! @$c52 || set_newick_c5( $node1, $c52 ); | ||
1866 : | ! defined( $c51 ) || ! @$c51 || set_newick_c5( $node2, $c51 ); | ||
1867 : | |||
1868 : | reroot_newick_by_path( @rest ); # Node 2 is first element of rest | ||
1869 : | } | ||
1870 : | |||
1871 : | |||
1872 : | #------------------------------------------------------------------------------- | ||
1873 : | # Move root of tree to named tip. | ||
1874 : | # | ||
1875 : | # $newtree = reroot_newick_to_tip( $tree, $tip ) | ||
1876 : | #------------------------------------------------------------------------------- | ||
1877 : | sub reroot_newick_to_tip { | ||
1878 : | my ( $tree, $tipname ) = @_; | ||
1879 : | reroot_newick_by_path( path_to_tip( $tree, $tipname ) ); | ||
1880 : | } | ||
1881 : | |||
1882 : | |||
1883 : | #------------------------------------------------------------------------------- | ||
1884 : | # Move root of tree to be node adjacent to a named tip. | ||
1885 : | # | ||
1886 : | # $newtree = reroot_newick_next_to_tip( $tree, $tip ) | ||
1887 : | #------------------------------------------------------------------------------- | ||
1888 : | sub reroot_newick_next_to_tip { | ||
1889 : | my ( $tree, $tipname ) = @_; | ||
1890 : | my @path = path_to_tip( $tree, $tipname ); | ||
1891 : | @path || return undef; | ||
1892 : | @path == 1 ? reroot_newick_by_path( $tree, 1, newick_desc_i( $tree, 1 ) ) | ||
1893 : | : reroot_newick_by_path( @path[0 .. @path-3] ); | ||
1894 : | } | ||
1895 : | |||
1896 : | |||
1897 : | #------------------------------------------------------------------------------- | ||
1898 : | # Move root of tree to a node, defined by 1 or 3 tip names. | ||
1899 : | # | ||
1900 : | # $newtree = reroot_newick_to_node( $tree, @node ) | ||
1901 : | #------------------------------------------------------------------------------- | ||
1902 : | sub reroot_newick_to_node { | ||
1903 : | reroot_newick_by_path( path_to_node( @_ ) ); | ||
1904 : | } | ||
1905 : | |||
1906 : | |||
1907 : | #------------------------------------------------------------------------------- | ||
1908 : | # Move root of tree to a node, defined by reference. | ||
1909 : | # | ||
1910 : | # $newtree = reroot_newick_to_node_ref( $tree, $noderef ) | ||
1911 : | #------------------------------------------------------------------------------- | ||
1912 : | sub reroot_newick_to_node_ref { | ||
1913 : | my ( $tree, $node ) = @_; | ||
1914 : | reroot_newick_by_path( path_to_node_ref( $tree, $node ) ); | ||
1915 : | } | ||
1916 : | |||
1917 : | |||
1918 : | #------------------------------------------------------------------------------- | ||
1919 : | golsen | 1.9 | # Reroot a newick tree along the path between 2 nodes: |
1920 : | # | ||
1921 : | # $tree = reroot_newick_between_nodes( $tree, $node1, $node2, $fraction ) | ||
1922 : | #------------------------------------------------------------------------------- | ||
1923 : | sub reroot_newick_between_nodes | ||
1924 : | { | ||
1925 : | my ( $tree, $node1, $node2, $fraction ) = @_; | ||
1926 : | array_ref( $tree ) or return undef; | ||
1927 : | $fraction >= 0 && $fraction <= 1 or return undef; | ||
1928 : | |||
1929 : | # Find the paths to the nodes: | ||
1930 : | |||
1931 : | my @path1 = path_to_node( $tree, $node1 ) or return undef; | ||
1932 : | my @path2 = path_to_node( $tree, $node2 ) or return undef; | ||
1933 : | |||
1934 : | golsen | 1.12 | reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction ) |
1935 : | } | ||
1936 : | |||
1937 : | |||
1938 : | #------------------------------------------------------------------------------- | ||
1939 : | # Reroot a newick tree along the path between 2 nodes: | ||
1940 : | # | ||
1941 : | # $tree = reroot_newick_between_node_refs( $tree, $node1, $node2, $fraction ) | ||
1942 : | #------------------------------------------------------------------------------- | ||
1943 : | sub reroot_newick_between_node_refs | ||
1944 : | { | ||
1945 : | my ( $tree, $node1, $node2, $fraction ) = @_; | ||
1946 : | array_ref( $tree ) or return undef; | ||
1947 : | |||
1948 : | # Find the paths to the nodes: | ||
1949 : | |||
1950 : | my @path1 = path_to_node_ref( $tree, $node1 ) or return undef; | ||
1951 : | my @path2 = path_to_node_ref( $tree, $node2 ) or return undef;; | ||
1952 : | |||
1953 : | reroot_newick_between_nodes_by_path( $tree, \@path1, \@path2, $fraction ) | ||
1954 : | } | ||
1955 : | |||
1956 : | |||
1957 : | #------------------------------------------------------------------------------- | ||
1958 : | # Reroot a newick tree along the path between 2 nodes defined by paths: | ||
1959 : | # | ||
1960 : | # $tree = reroot_newick_between_nodes_by_path( $tree, $path1, $path2, $fraction ) | ||
1961 : | #------------------------------------------------------------------------------- | ||
1962 : | sub reroot_newick_between_nodes_by_path | ||
1963 : | { | ||
1964 : | my ( $tree, $path1, $path2, $fraction ) = @_; | ||
1965 : | array_ref( $tree ) and array_ref( $path1 ) and array_ref( $path2 ) | ||
1966 : | or return undef; | ||
1967 : | $fraction >= 0 && $fraction <= 1 or return undef; | ||
1968 : | |||
1969 : | my @path1 = @$path1; | ||
1970 : | my @path2 = @$path2; | ||
1971 : | |||
1972 : | golsen | 1.9 | # Trim the common prefix, saving it: |
1973 : | |||
1974 : | my @prefix = (); | ||
1975 : | while ( $path1[1] == $path2[1] ) | ||
1976 : | { | ||
1977 : | push @prefix, splice( @path1, 0, 2 ); | ||
1978 : | splice( @path2, 0, 2 ); | ||
1979 : | } | ||
1980 : | |||
1981 : | my ( @path, $dist ); | ||
1982 : | if ( @path1 < 3 ) | ||
1983 : | { | ||
1984 : | @path2 >= 3 or return undef; # node1 = node2 | ||
1985 : | $dist = $fraction * newick_path_length( @path2 ); | ||
1986 : | @path = @path2; | ||
1987 : | } | ||
1988 : | elsif ( @path2 < 3 ) | ||
1989 : | { | ||
1990 : | $dist = ( 1 - $fraction ) * newick_path_length( @path1 ); | ||
1991 : | @path = @path1; | ||
1992 : | } | ||
1993 : | else | ||
1994 : | { | ||
1995 : | my $dist1 = newick_path_length( @path1 ); | ||
1996 : | my $dist2 = newick_path_length( @path2 ); | ||
1997 : | $dist = $fraction * ( $dist1 + $dist2 ) - $dist1; | ||
1998 : | @path = ( $dist <= 0 ) ? @path1 : @path2; | ||
1999 : | $dist = abs( $dist ); | ||
2000 : | } | ||
2001 : | |||
2002 : | # Descend tree until we reach the insertion branch: | ||
2003 : | |||
2004 : | my $x; | ||
2005 : | while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) ) | ||
2006 : | { | ||
2007 : | $dist -= $x; | ||
2008 : | push @prefix, splice( @path, 0, 2 ); | ||
2009 : | } | ||
2010 : | |||
2011 : | # Insert the new node: | ||
2012 : | |||
2013 : | my $newnode = [ [ $path[2] ], undef, $dist ]; | ||
2014 : | set_newick_desc_i( $path[0], $path[1], $newnode ); | ||
2015 : | set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) ); | ||
2016 : | |||
2017 : | # We can now build the path from root to the new node | ||
2018 : | |||
2019 : | reroot_newick_by_path( @prefix, @path[0,1], $newnode ); | ||
2020 : | } | ||
2021 : | |||
2022 : | |||
2023 : | #------------------------------------------------------------------------------- | ||
2024 : | golsen | 1.1 | # Move root of tree to an approximate midpoint. |
2025 : | # | ||
2026 : | # $newtree = reroot_newick_to_approx_midpoint( $tree ) | ||
2027 : | #------------------------------------------------------------------------------- | ||
2028 : | sub reroot_newick_to_approx_midpoint { | ||
2029 : | my ( $tree ) = @_; | ||
2030 : | golsen | 1.5 | |
2031 : | golsen | 1.1 | # Compile average tip to node distances assending |
2032 : | |||
2033 : | my $dists1 = average_to_tips_1( $tree ); | ||
2034 : | |||
2035 : | golsen | 1.12 | # Compile average tip to node distances descending, returning midpoint |
2036 : | # cadidates as a list of [ $node1, $node2, $fraction ] | ||
2037 : | |||
2038 : | my @mids = average_to_tips_2( $dists1, undef, undef ); | ||
2039 : | |||
2040 : | # Reroot to first midpoint candidate | ||
2041 : | |||
2042 : | return $tree if ! @mids; | ||
2043 : | my ( $node1, $node2, $fraction ) = @{ $mids[0] }; | ||
2044 : | reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 ); | ||
2045 : | } | ||
2046 : | |||
2047 : | |||
2048 : | #------------------------------------------------------------------------------- | ||
2049 : | # Move root of tree to a midpoint. | ||
2050 : | # | ||
2051 : | # $newtree = reroot_newick_to_midpoint( $tree ) | ||
2052 : | #------------------------------------------------------------------------------- | ||
2053 : | sub reroot_newick_to_midpoint { | ||
2054 : | my ( $tree ) = @_; | ||
2055 : | |||
2056 : | # Compile average tip to node distances assending | ||
2057 : | |||
2058 : | my $dists1 = average_to_tips_1( $tree ); | ||
2059 : | golsen | 1.1 | |
2060 : | golsen | 1.12 | # Compile average tip to node distances descending, returning midpoint |
2061 : | # [ $node1, $node2, $fraction ] | ||
2062 : | golsen | 1.1 | |
2063 : | golsen | 1.12 | my @mids = average_to_tips_2( $dists1, undef, undef ); |
2064 : | golsen | 1.1 | |
2065 : | golsen | 1.12 | @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree; |
2066 : | golsen | 1.1 | } |
2067 : | |||
2068 : | |||
2069 : | golsen | 1.12 | #------------------------------------------------------------------------------- |
2070 : | # Compile average tip to node distances assending | ||
2071 : | #------------------------------------------------------------------------------- | ||
2072 : | golsen | 1.1 | sub average_to_tips_1 { |
2073 : | my ( $node ) = @_; | ||
2074 : | |||
2075 : | my @desc_dists = map { average_to_tips_1( $_ ) } newick_desc_list( $node ); | ||
2076 : | my $x_below = 0; | ||
2077 : | if ( @desc_dists ) | ||
2078 : | { | ||
2079 : | foreach ( @desc_dists ) { $x_below += $_->[0] } | ||
2080 : | $x_below /= @desc_dists; | ||
2081 : | } | ||
2082 : | golsen | 1.12 | |
2083 : | golsen | 1.1 | my $x = newick_x( $node ) || 0; |
2084 : | my $x_net = $x_below + $x; | ||
2085 : | |||
2086 : | [ $x_net, $x, $x_below, [ @desc_dists ], $node ] | ||
2087 : | } | ||
2088 : | |||
2089 : | golsen | 1.9 | |
2090 : | golsen | 1.12 | #------------------------------------------------------------------------------- |
2091 : | # Compile average tip to node distances descending, returning midpoint as | ||
2092 : | # [ $node1, $node2, $fraction_of_dist_between ] | ||
2093 : | #------------------------------------------------------------------------------- | ||
2094 : | golsen | 1.1 | sub average_to_tips_2 { |
2095 : | my ( $dists1, $x_above, $anc_node ) = @_; | ||
2096 : | my ( undef, $x, $x_below, $desc_list, $node ) = @$dists1; | ||
2097 : | |||
2098 : | # Are we done? Root is in this node's branch, or "above"? | ||
2099 : | |||
2100 : | golsen | 1.12 | my @mids = (); |
2101 : | golsen | 1.1 | if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) ) |
2102 : | { | ||
2103 : | # At this point the root can only be in this node's branch, | ||
2104 : | # or "above" it in the current rooting of the tree (which | ||
2105 : | # would mean that the midpoint is actually down a different | ||
2106 : | # path from the root of the current tree). | ||
2107 : | # | ||
2108 : | # Is the root in the current branch? | ||
2109 : | |||
2110 : | if ( ( $x_below + $x ) >= $x_above ) | ||
2111 : | { | ||
2112 : | golsen | 1.12 | # We will need to make a new node for the root, $fract of |
2113 : | # the way from $node to $anc_node: | ||
2114 : | my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 ) | ||
2115 : | : 0.5; | ||
2116 : | push @mids, [ $node, $anc_node, $fract ]; | ||
2117 : | golsen | 1.1 | } |
2118 : | } | ||
2119 : | |||
2120 : | golsen | 1.12 | # The root might be somewhere below this node: |
2121 : | golsen | 1.1 | |
2122 : | golsen | 1.5 | my $n_1 = @$desc_list - ( $anc_node ? 0 : 1 ); |
2123 : | golsen | 1.1 | my $ttl_dist = ( @$desc_list * $x_below ) + ( defined( $x_above ) ? ( $x_above + $x ) : 0 ); |
2124 : | |||
2125 : | foreach ( @$desc_list ) | ||
2126 : | { | ||
2127 : | # If input tree is tip_rooted, $n-1 can be 0, so: | ||
2128 : | |||
2129 : | my $above2 = $n_1 ? ( ( $ttl_dist - $_->[0] ) / $n_1 ) : 0; | ||
2130 : | golsen | 1.12 | push @mids, average_to_tips_2( $_, $above2, $node ); |
2131 : | golsen | 1.1 | } |
2132 : | |||
2133 : | golsen | 1.12 | return @mids; |
2134 : | golsen | 1.1 | } |
2135 : | |||
2136 : | golsen | 1.9 | |
2137 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
2138 : | golsen | 1.5 | # Move root of tree to an approximate midpoint. Weight by tips. |
2139 : | # | ||
2140 : | # $newtree = reroot_newick_to_approx_midpoint_w( $tree ) | ||
2141 : | #------------------------------------------------------------------------------- | ||
2142 : | sub reroot_newick_to_approx_midpoint_w { | ||
2143 : | my ( $tree ) = @_; | ||
2144 : | |||
2145 : | golsen | 1.12 | # Compile average tip to node distances assending from tips |
2146 : | |||
2147 : | my $dists1 = average_to_tips_1_w( $tree ); | ||
2148 : | |||
2149 : | # Compile average tip to node distances descending, returning midpoints | ||
2150 : | |||
2151 : | my @mids = average_to_tips_2_w( $dists1, undef, undef, undef ); | ||
2152 : | |||
2153 : | # Reroot to first midpoint candidate | ||
2154 : | |||
2155 : | return $tree if ! @mids; | ||
2156 : | my ( $node1, $node2, $fraction ) = @{ $mids[0] }; | ||
2157 : | reroot_newick_to_node_ref( $tree, $fraction >= 0.5 ? $node2 : $node1 ); | ||
2158 : | } | ||
2159 : | |||
2160 : | |||
2161 : | #------------------------------------------------------------------------------- | ||
2162 : | # Move root of tree to an approximate midpoint. Weight by tips. | ||
2163 : | # | ||
2164 : | # $newtree = reroot_newick_to_midpoint_w( $tree ) | ||
2165 : | #------------------------------------------------------------------------------- | ||
2166 : | sub reroot_newick_to_midpoint_w { | ||
2167 : | my ( $tree ) = @_; | ||
2168 : | |||
2169 : | golsen | 1.5 | # Compile average tip to node distances assending |
2170 : | |||
2171 : | my $dists1 = average_to_tips_1_w( $tree ); | ||
2172 : | |||
2173 : | # Compile average tip to node distances descending, returning midpoint node | ||
2174 : | |||
2175 : | golsen | 1.12 | my @mids = average_to_tips_2_w( $dists1, undef, undef, undef ); |
2176 : | golsen | 1.5 | |
2177 : | golsen | 1.12 | # Reroot at first candidate midpoint |
2178 : | golsen | 1.5 | |
2179 : | golsen | 1.12 | @mids ? reroot_newick_between_node_refs( $tree, @{ $mids[0] } ) : $tree; |
2180 : | golsen | 1.5 | } |
2181 : | |||
2182 : | |||
2183 : | sub average_to_tips_1_w { | ||
2184 : | my ( $node ) = @_; | ||
2185 : | |||
2186 : | my @desc_dists = map { average_to_tips_1_w( $_ ) } newick_desc_list( $node ); | ||
2187 : | my $x_below = 0; | ||
2188 : | my $n_below = 1; | ||
2189 : | if ( @desc_dists ) | ||
2190 : | { | ||
2191 : | $n_below = 0; | ||
2192 : | my $n; | ||
2193 : | foreach ( @desc_dists ) | ||
2194 : | { | ||
2195 : | $n_below += $n = $_->[1]; | ||
2196 : | $x_below += $n * $_->[0]; | ||
2197 : | } | ||
2198 : | $x_below /= $n_below; | ||
2199 : | } | ||
2200 : | golsen | 1.12 | |
2201 : | golsen | 1.5 | my $x = newick_x( $node ) || 0; |
2202 : | my $x_net = $x_below + $x; | ||
2203 : | |||
2204 : | [ $x_net, $n_below, $x, $x_below, [ @desc_dists ], $node ] | ||
2205 : | } | ||
2206 : | |||
2207 : | |||
2208 : | sub average_to_tips_2_w { | ||
2209 : | my ( $dists1, $x_above, $n_above, $anc_node ) = @_; | ||
2210 : | my ( undef, $n_below, $x, $x_below, $desc_list, $node ) = @$dists1; | ||
2211 : | |||
2212 : | # Are we done? Root is in this node's branch, or "above"? | ||
2213 : | |||
2214 : | golsen | 1.12 | my @mids = (); |
2215 : | golsen | 1.5 | if ( defined( $x_above ) && ( ( $x_above + $x ) >= $x_below ) ) |
2216 : | { | ||
2217 : | # At this point the root can only be in this node's branch, | ||
2218 : | # or "above" it in the current rooting of the tree (which | ||
2219 : | # would mean that the midpoint is actually down a different | ||
2220 : | # path from the root of the current tree). | ||
2221 : | # | ||
2222 : | golsen | 1.12 | # Is their a root in the current branch? |
2223 : | golsen | 1.5 | |
2224 : | if ( ( $x_below + $x ) >= $x_above ) | ||
2225 : | { | ||
2226 : | golsen | 1.12 | # We will need to make a new node for the root, $fract of |
2227 : | # the way from $node to $anc_node: | ||
2228 : | my $fract = ( $x > 0 ) ? 0.5 * ( ( $x_above - $x_below ) / $x + 1 ) | ||
2229 : | : 0.5; | ||
2230 : | push @mids, [ $node, $anc_node, $fract ]; | ||
2231 : | golsen | 1.5 | } |
2232 : | } | ||
2233 : | |||
2234 : | # The root must be some where below this node: | ||
2235 : | |||
2236 : | $n_above ||= 0; | ||
2237 : | my $n = $n_above + $n_below; | ||
2238 : | my $ttl_w_dist = ( $n_below * $x_below ) | ||
2239 : | + ( defined( $x_above ) ? $n_above * ( $x_above + $x ) : 0 ); | ||
2240 : | |||
2241 : | foreach ( @$desc_list ) | ||
2242 : | { | ||
2243 : | my $n_2 = $_->[1]; # n in subtree | ||
2244 : | my $n_above2 = $n - $n_2; # tip rooted has 1 above | ||
2245 : | |||
2246 : | # If input tree is tip_rooted, $n_above2 can be 0, so: | ||
2247 : | |||
2248 : | my $x_above2 = $n_above2 ? ( ( $ttl_w_dist - $n_2 * $_->[0] ) / $n_above2 ) | ||
2249 : | : 0; | ||
2250 : | golsen | 1.12 | push @mids, average_to_tips_2_w( $_, $x_above2, $n_above2 || 1, $node ); |
2251 : | golsen | 1.5 | } |
2252 : | |||
2253 : | golsen | 1.12 | return @mids; |
2254 : | golsen | 1.5 | } |
2255 : | |||
2256 : | golsen | 1.9 | |
2257 : | golsen | 1.5 | #------------------------------------------------------------------------------- |
2258 : | golsen | 1.1 | # Move root of tree from tip to adjacent node. |
2259 : | # | ||
2260 : | # $newtree = uproot_tip_rooted_newick( $tree ) | ||
2261 : | #------------------------------------------------------------------------------- | ||
2262 : | sub uproot_tip_rooted_newick { | ||
2263 : | my ( $node ) = @_; | ||
2264 : | newick_is_tip_rooted( $node ) || return $node; | ||
2265 : | |||
2266 : | # Path to the sole descendant: | ||
2267 : | |||
2268 : | reroot_newick_by_path( $node, 1, newick_desc_i( $node, 1 ) ); | ||
2269 : | } | ||
2270 : | |||
2271 : | |||
2272 : | #------------------------------------------------------------------------------- | ||
2273 : | # Remove root bifurcation. | ||
2274 : | # | ||
2275 : | # Root node label, label comment and descendant list comment are discarded. | ||
2276 : | # | ||
2277 : | # $newtree = uproot_newick( $tree ) | ||
2278 : | #------------------------------------------------------------------------------- | ||
2279 : | sub uproot_newick { | ||
2280 : | my ( $node0 ) = @_; | ||
2281 : | newick_is_rooted( $node0 ) || return $node0; | ||
2282 : | |||
2283 : | my ( $node1, $node2 ) = newick_desc_list( $node0 ); | ||
2284 : | |||
2285 : | # Ensure that node1 has at least 1 descendant | ||
2286 : | |||
2287 : | if ( newick_n_desc( $node1 ) ) { | ||
2288 : | push @{ newick_desc_ref( $node1 ) }, $node2; # Add node2 to descend list | ||
2289 : | } | ||
2290 : | |||
2291 : | # Or node2 has at least 1 descendant | ||
2292 : | |||
2293 : | elsif ( newick_n_desc( $node2 ) ) { | ||
2294 : | unshift @{ newick_desc_ref( $node2 ) }, $node1; # Add node1 to descend list | ||
2295 : | ( $node1, $node2 ) = ( $node2, $node1 ); # And reverse labels | ||
2296 : | } | ||
2297 : | |||
2298 : | # We could make this into a tip rooted tree, but for now: | ||
2299 : | |||
2300 : | else { return $node0 } | ||
2301 : | |||
2302 : | # Prefix node1 branch to that of node2: | ||
2303 : | |||
2304 : | add_to_newick_branch( $node2, $node1 ); | ||
2305 : | set_newick_x( $node1, undef ); | ||
2306 : | |||
2307 : | # Tree prefix comment lists (as references): | ||
2308 : | |||
2309 : | my $C10 = newick_c1( $node0 ); | ||
2310 : | my $C11 = newick_c1( $node1 ); | ||
2311 : | golsen | 1.9 | if ( $C11 && @$C11 ) { |
2312 : | golsen | 1.1 | if ( $C10 && @$C10 ) { unshift @$C11, @$C10 } # Prefix to node1 comments |
2313 : | } | ||
2314 : | elsif ( $C10 && @$C10 ) { | ||
2315 : | set_newick_c1( $node1, $C10 ) # Or move node0 comments to node1 | ||
2316 : | } | ||
2317 : | |||
2318 : | $node1; | ||
2319 : | } | ||
2320 : | |||
2321 : | |||
2322 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2323 : | # Prefix branch of node2 to that of node1: | ||
2324 : | # | ||
2325 : | # $node1 = add_to_newick_branch( $node1, $node2 ) | ||
2326 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2327 : | sub add_to_newick_branch { | ||
2328 : | my ( $node1, $node2 ) = @_; | ||
2329 : | array_ref( $node1 ) || die "add_to_newick_branch: arg 1 not array ref\n"; | ||
2330 : | array_ref( $node2 ) || die "add_to_newick_branch: arg 2 not array ref\n"; | ||
2331 : | |||
2332 : | # Node structure template: | ||
2333 : | # my ( $DL, $L, $X, $C1, $C2, $C3, $C4, $C5 ) = @$node; | ||
2334 : | |||
2335 : | # Fix branch lengths for joining of two branches: | ||
2336 : | |||
2337 : | set_newick_x( $node1, newick_x( $node1 ) + newick_x( $node2 ) ); | ||
2338 : | |||
2339 : | # Merge branch length comments: | ||
2340 : | |||
2341 : | my $C41 = newick_c4( $node1 ); # Ref to node1 C4 | ||
2342 : | my $C42 = newick_c4( $node2 ); # Ref to node2 C4 | ||
2343 : | if ( $C41 && @$C41 ) { | ||
2344 : | if ( $C42 && @$C42 ) { unshift @$C41, @$C42 } # Add node2 comment | ||
2345 : | } | ||
2346 : | elsif ( $C42 && @$C42 ) { set_newick_c4( $node1, $C42 ) } # Or move node1 comment | ||
2347 : | |||
2348 : | my $C51 = newick_c5( $node1 ); # Ref to node1 C5 | ||
2349 : | my $C52 = newick_c5( $node2 ); # Ref to node2 C5 | ||
2350 : | if ( $C51 && @$C51 ) { | ||
2351 : | if ( $C52 && @$C52 ) { unshift @$C51, @$C52 } # Add node2 comment | ||
2352 : | } | ||
2353 : | elsif ( $C52 && @$C52 ) { set_newick_c5( $node1, $C52 ) } # Or move node1 comment | ||
2354 : | |||
2355 : | $node1; | ||
2356 : | } | ||
2357 : | |||
2358 : | |||
2359 : | #------------------------------------------------------------------------------- | ||
2360 : | golsen | 1.5 | # Collapse zero-length branches to make multifurcation. The original tree |
2361 : | # is modified. | ||
2362 : | # | ||
2363 : | # $tree = collapse_zero_length_branches( $tree ) | ||
2364 : | # $tree = collapse_zero_length_branches( $tree, $not_root ) | ||
2365 : | #------------------------------------------------------------------------------- | ||
2366 : | sub collapse_zero_length_branches { | ||
2367 : | my ( $tree, $not_root ) = @_; | ||
2368 : | array_ref( $tree ) || return undef; | ||
2369 : | |||
2370 : | my @desc = newick_desc_list( $tree ); | ||
2371 : | @desc or return ( $tree ); # Cannot collapse terminal branch | ||
2372 : | |||
2373 : | # Analyze descendants: | ||
2374 : | |||
2375 : | $not_root ||= 0; | ||
2376 : | my @new_desc = (); | ||
2377 : | my $changed = 0; | ||
2378 : | foreach ( @desc ) | ||
2379 : | { | ||
2380 : | my ( undef, @to_add ) = collapse_zero_length_branches( $_, $not_root+1 ); | ||
2381 : | if ( @to_add ) | ||
2382 : | { | ||
2383 : | push @new_desc, @to_add; | ||
2384 : | $changed = 1; | ||
2385 : | } | ||
2386 : | else | ||
2387 : | { | ||
2388 : | push @new_desc, $_; | ||
2389 : | } | ||
2390 : | } | ||
2391 : | set_newick_desc_ref( $tree, [ @new_desc ] ) if $changed; | ||
2392 : | |||
2393 : | # Collapse if not root, not tip and zero (or negative) branch: | ||
2394 : | |||
2395 : | my $collapse = $not_root && @new_desc && ( newick_x( $tree ) <= 0 ) ? 1 : 0; | ||
2396 : | ( $tree, ( $collapse ? @new_desc : () ) ); | ||
2397 : | } | ||
2398 : | |||
2399 : | golsen | 1.8 | #------------------------------------------------------------------------------- |
2400 : | # Add a subtree to a newick tree node: | ||
2401 : | # | ||
2402 : | # $node = newick_insert_at_node( $node, $subtree ) | ||
2403 : | #------------------------------------------------------------------------------- | ||
2404 : | sub newick_insert_at_node | ||
2405 : | { | ||
2406 : | my ( $node, $subtree ) = @_; | ||
2407 : | array_ref( $node ) && array_ref( $subtree ) or return undef; | ||
2408 : | |||
2409 : | # We could check validity of trees, but .... | ||
2410 : | |||
2411 : | my $dl = newick_desc_ref( $node ); | ||
2412 : | if ( array_ref( $dl ) ) | ||
2413 : | { | ||
2414 : | push @$dl, $subtree; | ||
2415 : | } | ||
2416 : | else | ||
2417 : | { | ||
2418 : | set_newick_desc_ref( $node, [ $subtree ] ); | ||
2419 : | } | ||
2420 : | return $node; | ||
2421 : | } | ||
2422 : | |||
2423 : | |||
2424 : | #------------------------------------------------------------------------------- | ||
2425 : | # Insert a subtree into a newick tree along the path between 2 nodes: | ||
2426 : | # | ||
2427 : | # $tree = newick_insert_between_nodes( $tree, $subtree, $node1, $node2, $fraction ) | ||
2428 : | #------------------------------------------------------------------------------- | ||
2429 : | sub newick_insert_between_nodes | ||
2430 : | { | ||
2431 : | my ( $tree, $subtree, $node1, $node2, $fraction ) = @_; | ||
2432 : | array_ref( $tree ) && array_ref( $subtree ) or return undef; | ||
2433 : | $fraction >= 0 && $fraction <= 1 or return undef; | ||
2434 : | |||
2435 : | # Find the paths to the nodes: | ||
2436 : | |||
2437 : | my @path1 = path_to_node( $tree, $node1 ) or return undef; | ||
2438 : | my @path2 = path_to_node( $tree, $node2 ) or return undef; | ||
2439 : | |||
2440 : | # Trim the common prefix: | ||
2441 : | |||
2442 : | while ( $path1[1] == $path2[1] ) | ||
2443 : | { | ||
2444 : | splice( @path1, 0, 2 ); | ||
2445 : | splice( @path2, 0, 2 ); | ||
2446 : | } | ||
2447 : | |||
2448 : | my ( @path, $dist ); | ||
2449 : | if ( @path1 < 3 ) | ||
2450 : | { | ||
2451 : | @path2 >= 3 or return undef; # node1 = node2 | ||
2452 : | golsen | 1.9 | $dist = $fraction * newick_path_length( @path2 ); |
2453 : | golsen | 1.8 | @path = @path2; |
2454 : | } | ||
2455 : | elsif ( @path2 < 3 ) | ||
2456 : | { | ||
2457 : | $dist = ( 1 - $fraction ) * newick_path_length( @path1 ); | ||
2458 : | @path = @path1; | ||
2459 : | } | ||
2460 : | else | ||
2461 : | { | ||
2462 : | my $dist1 = newick_path_length( @path1 ); | ||
2463 : | my $dist2 = newick_path_length( @path2 ); | ||
2464 : | $dist = $fraction * ( $dist1 + $dist2 ) - $dist1; | ||
2465 : | @path = ( $dist <= 0 ) ? @path1 : @path2; | ||
2466 : | $dist = abs( $dist ); | ||
2467 : | } | ||
2468 : | |||
2469 : | # Descend tree until we reach the insertion branch: | ||
2470 : | |||
2471 : | my $x; | ||
2472 : | while ( ( $dist > ( $x = newick_x( $path[2] ) ) ) && ( @path > 3 ) ) | ||
2473 : | { | ||
2474 : | $dist -= $x; | ||
2475 : | splice( @path, 0, 2 ); | ||
2476 : | } | ||
2477 : | |||
2478 : | # Insert the new node: | ||
2479 : | |||
2480 : | set_newick_desc_i( $path[0], $path[1], [ [ $path[2], $subtree ], undef, $dist ] ); | ||
2481 : | set_newick_x( $path[2], ( ( $x > $dist ) ? ( $x - $dist ) : 0 ) ); | ||
2482 : | |||
2483 : | return $tree; | ||
2484 : | } | ||
2485 : | |||
2486 : | golsen | 1.5 | |
2487 : | #------------------------------------------------------------------------------- | ||
2488 : | golsen | 1.1 | # Prune one or more tips from a tree: |
2489 : | # Caveat: if one tip is listed, the original tree is modified. | ||
2490 : | golsen | 1.5 | # if more than one tip is listed, a copy of the tree is returned |
2491 : | golsen | 1.1 | # (even if it is just listing the same tip twice!). |
2492 : | # | ||
2493 : | # $newtree = prune_from_newick( $tree, $tip ) | ||
2494 : | # $newtree = prune_from_newick( $tree, @tips ) | ||
2495 : | # $newtree = prune_from_newick( $tree, \@tips ) | ||
2496 : | #------------------------------------------------------------------------------- | ||
2497 : | sub prune_from_newick { | ||
2498 : | my ( $tr, @tips ) = @_; | ||
2499 : | if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } } | ||
2500 : | golsen | 1.9 | |
2501 : | golsen | 1.1 | if ( @tips == 0 ) { return $tr } |
2502 : | if ( @tips == 1 ) { return prune_1_from_newick( $tr, @tips ) } | ||
2503 : | |||
2504 : | my %del = map { ( $_, 1 ) } @tips; | ||
2505 : | my @keep = grep { ! $del{ $_ } } newick_tip_list( $tr ); | ||
2506 : | newick_subtree( $tr, @keep ); | ||
2507 : | } | ||
2508 : | |||
2509 : | |||
2510 : | #------------------------------------------------------------------------------- | ||
2511 : | # Prune a tip from a tree: | ||
2512 : | # | ||
2513 : | # $newtree = prune_1_from_newick( $tree, $tip ) | ||
2514 : | #------------------------------------------------------------------------------- | ||
2515 : | sub prune_1_from_newick { | ||
2516 : | my ( $tr, $tip ) = @_; | ||
2517 : | my @path = path_to_tip( $tr, $tip ); | ||
2518 : | if ( @path < 3 ) { return $tr } | ||
2519 : | |||
2520 : | my $node = $path[-1]; # Node with the tip | ||
2521 : | my $i1 = $path[-2]; # Descendant number of node in ancestor desc list | ||
2522 : | my $anc1 = $path[-3]; # Ancestor of node | ||
2523 : | my $nd1 = newick_n_desc( $anc1 ); # Number of descendants of ancestor | ||
2524 : | my $anc2 = ( @path >= 5 ) ? $path[-5] : undef; # Ancestor of anc1 | ||
2525 : | |||
2526 : | # dump_tree( $node ); | ||
2527 : | # print STDERR "i1 = $i1\n"; | ||
2528 : | # dump_tree( $anc1 ); | ||
2529 : | # print STDERR "nd1 = $nd1\n"; | ||
2530 : | # defined( $anc2 ) && dump_tree( $anc2 ); | ||
2531 : | |||
2532 : | if ( $nd1 > 3 || ( $anc2 && $nd1 > 2 ) ) { # Tip joins at multifurcation | ||
2533 : | splice( @{ $anc1->[0] }, $i1-1, 1 ); # delete the descendant | ||
2534 : | } | ||
2535 : | |||
2536 : | elsif ( $anc2 ) { # Tip joins at internal bifurcation | ||
2537 : | my $sis = newick_desc_i( $anc1, 3-$i1 ); # find sister node | ||
2538 : | add_to_newick_branch( $sis, $anc1 ); # combine internal branches | ||
2539 : | set_newick_desc_i( $anc2, $path[-4], $sis ); # remove $anc1 | ||
2540 : | } | ||
2541 : | |||
2542 : | elsif ( $nd1 == 2) { # Tip joins bifurcating root node | ||
2543 : | my $sis = newick_desc_i( $anc1, 3-$i1 ); # find sister node | ||
2544 : | $sis->[1] = $anc1->[1] if ! $sis->[1] && $anc1->[1]; # root label | ||
2545 : | $sis->[2] = undef; # root branch len | ||
2546 : | $sis->[3] = $anc1->[3] if ! $sis->[3] && $anc1->[3]; # tree comment | ||
2547 : | $sis->[4] = $anc1->[4] if ! $sis->[4] && $anc1->[4]; # desc list comment | ||
2548 : | golsen | 1.9 | $sis->[5] = $anc1->[5] if ! $sis->[5] && $anc1->[5]; # label comment |
2549 : | golsen | 1.1 | $sis->[6] = undef if $sis->[6]; # root branch comment |
2550 : | $sis->[7] = undef if $sis->[7]; # root branch comment | ||
2551 : | $tr = $sis; # sister is new root | ||
2552 : | } | ||
2553 : | |||
2554 : | elsif ( $nd1 == 3 ) { # Tip joins trifurcating root: | ||
2555 : | splice( @{ $anc1->[0] }, $i1-1, 1 ); # delete the descendant, and | ||
2556 : | $tr = uproot_newick( $tr ); # fix the rooting | ||
2557 : | } | ||
2558 : | |||
2559 : | else { | ||
2560 : | return undef; | ||
2561 : | } | ||
2562 : | |||
2563 : | return $tr; | ||
2564 : | } | ||
2565 : | |||
2566 : | |||
2567 : | #------------------------------------------------------------------------------- | ||
2568 : | golsen | 1.12 | # Produce a potentially rooted subtree with the desired tips: |
2569 : | # | ||
2570 : | # Except for (some) tip nodes, the tree produced is a copy. | ||
2571 : | # There is no check that requested tips exist. | ||
2572 : | # | ||
2573 : | # $newtree = rooted_newick_subtree( $tree, @tips ) | ||
2574 : | # $newtree = rooted_newick_subtree( $tree, \@tips ) | ||
2575 : | #------------------------------------------------------------------------------- | ||
2576 : | sub rooted_newick_subtree { | ||
2577 : | my ( $tr, @tips ) = @_; | ||
2578 : | if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } } | ||
2579 : | |||
2580 : | if ( @tips < 2 ) { return undef } | ||
2581 : | my $keephash = { map { ( $_, 1 ) } @tips }; | ||
2582 : | my $tr2 = subtree1( $tr, $keephash ); | ||
2583 : | $tr2->[2] = undef if $tr2; # undef root branch length | ||
2584 : | $tr2; | ||
2585 : | } | ||
2586 : | |||
2587 : | |||
2588 : | #------------------------------------------------------------------------------- | ||
2589 : | golsen | 1.1 | # Produce a subtree with the desired tips: |
2590 : | # | ||
2591 : | # Except for (some) tip nodes, the tree produced is a copy. | ||
2592 : | # There is no check that requested tips exist. | ||
2593 : | # | ||
2594 : | # $newtree = newick_subtree( $tree, @tips ) | ||
2595 : | # $newtree = newick_subtree( $tree, \@tips ) | ||
2596 : | #------------------------------------------------------------------------------- | ||
2597 : | sub newick_subtree { | ||
2598 : | my ( $tr, @tips ) = @_; | ||
2599 : | if ( @tips == 1 && ref( $tips[0] ) eq "ARRAY" ) { @tips = @{ $tips[0] } } | ||
2600 : | |||
2601 : | if ( @tips < 2 ) { return undef } | ||
2602 : | my $was_rooted = newick_is_rooted( $tr ); | ||
2603 : | my $keephash = { map { ( $_, 1 ) } @tips }; | ||
2604 : | my $tr2 = subtree1( $tr, $keephash ); | ||
2605 : | $tr2 = uproot_newick( $tr2 ) if ! $was_rooted && newick_is_rooted( $tr2 ); | ||
2606 : | $tr2->[2] = undef if $tr2; # undef root branch length | ||
2607 : | $tr2; | ||
2608 : | } | ||
2609 : | |||
2610 : | |||
2611 : | sub subtree1 { | ||
2612 : | my ( $tr, $keep ) = @_; | ||
2613 : | my @desc1 = newick_desc_list( $tr ); | ||
2614 : | |||
2615 : | # Is this a tip, and is it in the keep list? | ||
2616 : | |||
2617 : | if ( @desc1 < 1 ) { | ||
2618 : | return ( $keep->{ newick_lbl( $tr ) } ) ? $tr : undef; | ||
2619 : | } | ||
2620 : | |||
2621 : | # Internal node: analyze the descendants: | ||
2622 : | |||
2623 : | my @desc2 = (); | ||
2624 : | foreach ( @desc1 ) { | ||
2625 : | my $desc = subtree1( $_, $keep ); | ||
2626 : | if ( $desc && @$desc ) { push @desc2, $desc } | ||
2627 : | } | ||
2628 : | |||
2629 : | if ( @desc2 == 0 ) { return undef } | ||
2630 : | if ( @desc2 > 1 ) { return [ \@desc2, @$tr[ 1 .. @$tr - 1 ] ] } | ||
2631 : | |||
2632 : | # Exactly 1 descendant | ||
2633 : | |||
2634 : | my $desc = $desc2[ 0 ]; | ||
2635 : | my @nn = ( $desc->[0], | ||
2636 : | $desc->[1] ? $desc->[1] : $tr->[1], | ||
2637 : | defined( $tr->[2] ) ? $desc->[2] + $tr->[2] : undef | ||
2638 : | ); | ||
2639 : | |||
2640 : | # Merge comments (only recreating the ones that existed): | ||
2641 : | |||
2642 : | if ( $tr->[3] && @{$tr->[3]} || $desc->[3] && @{$desc->[3]} ) { | ||
2643 : | $nn[3] = [ $tr->[3] ? @{$tr->[3]} : (), $desc->[3] ? @{$desc->[3]} : () ]; | ||
2644 : | } | ||
2645 : | if ( $tr->[4] && @{$tr->[4]} || $desc->[4] && @{$desc->[4]} ) { | ||
2646 : | $nn[4] = [ $tr->[4] ? @{$tr->[4]} : (), $desc->[4] ? @{$desc->[4]} : () ]; | ||
2647 : | } | ||
2648 : | if ( $tr->[5] && @{$tr->[5]} || $desc->[5] && @{$desc->[5]} ) { | ||
2649 : | $nn[5] = [ $tr->[5] ? @{$tr->[5]} : (), $desc->[5] ? @{$desc->[5]} : () ]; | ||
2650 : | } | ||
2651 : | if ( $tr->[6] && @{$tr->[6]} || $desc->[6] && @{$desc->[6]} ) { | ||
2652 : | $nn[6] = [ $tr->[6] ? @{$tr->[6]} : (), $desc->[6] ? @{$desc->[6]} : () ]; | ||
2653 : | } | ||
2654 : | if ( $tr->[7] && @{$tr->[7]} || $desc->[7] && @{$desc->[7]} ) { | ||
2655 : | $nn[7] = [ $tr->[7] ? @{$tr->[7]} : (), $desc->[7] ? @{$desc->[7]} : () ]; | ||
2656 : | } | ||
2657 : | |||
2658 : | return \@nn; | ||
2659 : | } | ||
2660 : | |||
2661 : | |||
2662 : | golsen | 1.12 | #------------------------------------------------------------------------------- |
2663 : | # The smallest subtree of rooted tree that includes @tips: | ||
2664 : | # | ||
2665 : | # $node = newick_covering_subtree( $tree, @tips ) | ||
2666 : | # $node = newick_covering_subtree( $tree, \@tips ) | ||
2667 : | #------------------------------------------------------------------------------- | ||
2668 : | |||
2669 : | sub newick_covering_subtree { | ||
2670 : | my $tree = shift; | ||
2671 : | my %tips = map { $_ => 1 } ( ( ref( $_[0] ) eq 'ARRAY' ) ? @{ $_[0] } : @_ ); | ||
2672 : | |||
2673 : | # Return smallest covering node, if any: | ||
2674 : | |||
2675 : | ( newick_covering_subtree( $tree, \%tips ) )[ 0 ]; | ||
2676 : | } | ||
2677 : | |||
2678 : | |||
2679 : | sub newick_covering_subtree_1 { | ||
2680 : | my ( $node, $tips ) = @_; | ||
2681 : | my $n_cover = 0; | ||
2682 : | my @desc = newick_desc_list( $node ); | ||
2683 : | if ( @desc ) | ||
2684 : | { | ||
2685 : | foreach ( @desc ) | ||
2686 : | { | ||
2687 : | my ( $subtree, $n ) = newick_covering_subtree_1( $_, $tips ); | ||
2688 : | return ( $subtree, $n ) if $subtree; | ||
2689 : | $n_cover += $n; | ||
2690 : | } | ||
2691 : | } | ||
2692 : | elsif ( $tips->{ newick_lbl( $node ) } ) | ||
2693 : | { | ||
2694 : | $n_cover++; | ||
2695 : | } | ||
2696 : | |||
2697 : | # If all tips are covered, return node | ||
2698 : | |||
2699 : | ( $n_cover == keys %$tips ) ? ( $node, $n_cover ) : ( undef, $n_cover ); | ||
2700 : | } | ||
2701 : | |||
2702 : | |||
2703 : | golsen | 1.1 | #=============================================================================== |
2704 : | # | ||
2705 : | golsen | 1.9 | # Representative subtrees |
2706 : | # | ||
2707 : | #=============================================================================== | ||
2708 : | # Find subtree of size n representating vicinity of the root: | ||
2709 : | # | ||
2710 : | # $subtree = root_neighborhood_representative_tree( $tree, $n, \%tip_priority ) | ||
2711 : | # $subtree = root_neighborhood_representative_tree( $tree, $n ) | ||
2712 : | # | ||
2713 : | # Note that if $tree is rooted, then the subtree will also be. This can have | ||
2714 : | # consequences on downstream programs. | ||
2715 : | #------------------------------------------------------------------------------- | ||
2716 : | sub root_neighborhood_representative_tree | ||
2717 : | { | ||
2718 : | my ( $tree, $n, $tip_priority ) = @_; | ||
2719 : | array_ref( $tree ) && ( $n >= 2 ) or return undef; | ||
2720 : | if ( newick_tip_count( $tree ) <= $n ) { return $tree } | ||
2721 : | |||
2722 : | $tip_priority ||= default_tip_priority( $tree ); | ||
2723 : | my @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) } | ||
2724 : | root_proximal_newick_subtrees( $tree, $n ); | ||
2725 : | |||
2726 : | newick_subtree( copy_newick_tree( $tree ), \@tips ); | ||
2727 : | } | ||
2728 : | |||
2729 : | |||
2730 : | #------------------------------------------------------------------------------- | ||
2731 : | # Find n tips to represent tree lineages in vicinity of another tip. | ||
2732 : | # Default tip priority is short total branch length. | ||
2733 : | # | ||
2734 : | # \@tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
2735 : | # @tips = root_neighborhood_representative_tips( $tree, $n, \%tip_priority ) | ||
2736 : | # \@tips = root_neighborhood_representative_tips( $tree, $n ) | ||
2737 : | # @tips = root_neighborhood_representative_tips( $tree, $n ) | ||
2738 : | #------------------------------------------------------------------------------- | ||
2739 : | sub root_neighborhood_representative_tips | ||
2740 : | { | ||
2741 : | my ( $tree, $n, $tip_priority ) = @_; | ||
2742 : | array_ref( $tree ) && ( $n >= 2 ) or return undef; | ||
2743 : | |||
2744 : | my @tips; | ||
2745 : | if ( newick_tip_count( $tree ) <= $n ) | ||
2746 : | { | ||
2747 : | @tips = newick_tip_list( $tree ); | ||
2748 : | } | ||
2749 : | else | ||
2750 : | { | ||
2751 : | $tip_priority ||= default_tip_priority( $tree ); | ||
2752 : | @tips = map { representative_tip_of_newick_node( $_, $tip_priority ) } | ||
2753 : | root_proximal_newick_subtrees( $tree, $n ); | ||
2754 : | } | ||
2755 : | |||
2756 : | wantarray ? @tips : \@tips; | ||
2757 : | } | ||
2758 : | |||
2759 : | |||
2760 : | #------------------------------------------------------------------------------- | ||
2761 : | # Find subtree of size n representating vicinity of a tip: | ||
2762 : | # | ||
2763 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n, \%tip_priority ) | ||
2764 : | # $subtree = tip_neighborhood_representative_tree( $tree, $tip, $n ) | ||
2765 : | #------------------------------------------------------------------------------- | ||
2766 : | sub tip_neighborhood_representative_tree | ||
2767 : | { | ||
2768 : | my ( $tree, $tip, $n, $tip_priority ) = @_; | ||
2769 : | array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef; | ||
2770 : | newick_tip_in_tree( $tree, $tip ) or return undef; | ||
2771 : | |||
2772 : | my $tree1 = copy_newick_tree( $tree ); | ||
2773 : | if ( newick_tip_count( $tree1 ) - 1 <= $n ) | ||
2774 : | { | ||
2775 : | return prune_from_newick( $tree1, $tip ) | ||
2776 : | } | ||
2777 : | |||
2778 : | $tree1 = reroot_newick_to_tip( $tree1, $tip ); | ||
2779 : | $tree1 = newick_desc_i( $tree1, 1 ); # Node immediately below tip | ||
2780 : | my @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority ); | ||
2781 : | newick_subtree( copy_newick_tree( $tree ), \@tips ); | ||
2782 : | } | ||
2783 : | |||
2784 : | |||
2785 : | #------------------------------------------------------------------------------- | ||
2786 : | # Find n tips to represent tree lineages in vicinity of another tip. | ||
2787 : | # Default tip priority is short total branch length. | ||
2788 : | # | ||
2789 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
2790 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n, \%tip_priority ) | ||
2791 : | # \@tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
2792 : | # @tips = tip_neighborhood_representative_tips( $tree, $tip, $n ) | ||
2793 : | #------------------------------------------------------------------------------- | ||
2794 : | sub tip_neighborhood_representative_tips | ||
2795 : | { | ||
2796 : | my ( $tree, $tip, $n, $tip_priority ) = @_; | ||
2797 : | array_ref( $tree ) && $tip && ( $n >= 2 ) or return undef; | ||
2798 : | newick_tip_in_tree( $tree, $tip ) or return undef; | ||
2799 : | |||
2800 : | my @tips = newick_tip_list( $tree ); | ||
2801 : | if ( newick_tip_count( $tree ) - 1 <= $n ) | ||
2802 : | { | ||
2803 : | @tips = grep { $_ ne $tip } @tips; | ||
2804 : | } | ||
2805 : | else | ||
2806 : | { | ||
2807 : | my $tree1 = copy_newick_tree( $tree ); | ||
2808 : | $tree1 = reroot_newick_to_tip( $tree1, $tip ); | ||
2809 : | $tree1 = newick_desc_i( $tree1, 1 ); # Node immediately below tip | ||
2810 : | @tips = root_neighborhood_representative_tips( $tree1, $n, $tip_priority ); | ||
2811 : | } | ||
2812 : | |||
2813 : | wantarray ? @tips : \@tips; | ||
2814 : | } | ||
2815 : | |||
2816 : | |||
2817 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2818 : | # Anonymous hash of the negative distance from root to each tip: | ||
2819 : | # | ||
2820 : | # \%tip_priority = default_tip_priority( $tree ) | ||
2821 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2822 : | sub default_tip_priority | ||
2823 : | { | ||
2824 : | my ( $tree ) = @_; | ||
2825 : | my $tip_distances = newick_tip_distances( $tree ) || {}; | ||
2826 : | return { map { $_ => -$tip_distances->{$_} } keys %$tip_distances }; | ||
2827 : | } | ||
2828 : | |||
2829 : | |||
2830 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2831 : | # Select a tip from a subtree base on a priority value: | ||
2832 : | # | ||
2833 : | # $tip = representative_tip_of_newick_node( $node, \%tip_priority ) | ||
2834 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2835 : | sub representative_tip_of_newick_node | ||
2836 : | { | ||
2837 : | my ( $node, $tip_priority ) = @_; | ||
2838 : | my ( $tip ) = sort { $b->[1] <=> $a->[1] } # The best | ||
2839 : | map { [ $_, $tip_priority->{ $_ } ] } | ||
2840 : | newick_tip_list( $node ); | ||
2841 : | $tip->[0]; # Label from label-priority pair | ||
2842 : | } | ||
2843 : | |||
2844 : | |||
2845 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2846 : | # Find n subtrees focused around the root of a tree. Typically each will | ||
2847 : | # then be reduced to a single tip to make a representative tree: | ||
2848 : | # | ||
2849 : | # @subtrees = root_proximal_newick_subtrees( $tree, $n ) | ||
2850 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2851 : | sub root_proximal_newick_subtrees | ||
2852 : | { | ||
2853 : | my ( $tree, $n ) = @_; | ||
2854 : | my $node_start_end = newick_branch_intervals( $tree ); | ||
2855 : | n_representative_branches( $n, $node_start_end ); | ||
2856 : | } | ||
2857 : | |||
2858 : | |||
2859 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2860 : | # @node_start_end = newick_branch_intervals( $tree ) | ||
2861 : | # \@node_start_end = newick_branch_intervals( $tree ) | ||
2862 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2863 : | sub newick_branch_intervals | ||
2864 : | { | ||
2865 : | my ( $node, $parent_x ) = @_; | ||
2866 : | $parent_x ||= 0; | ||
2867 : | my ( $desc, undef, $dx ) = @$node; | ||
2868 : | my $x = $parent_x + $dx; | ||
2869 : | my $interval = [ $node, $parent_x, $desc && @$desc ? $x : 1e100 ]; | ||
2870 : | my @intervals = ( $interval, | ||
2871 : | map { &newick_branch_intervals( $_, $x ) } @$desc | ||
2872 : | ); | ||
2873 : | return wantarray ? @intervals : \@intervals; | ||
2874 : | } | ||
2875 : | |||
2876 : | |||
2877 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2878 : | # @ids = n_representative_branches( $n, @id_start_end ) | ||
2879 : | # @ids = n_representative_branches( $n, \@id_start_end ) | ||
2880 : | # \@ids = n_representative_branches( $n, @id_start_end ) | ||
2881 : | # \@ids = n_representative_branches( $n, \@id_start_end ) | ||
2882 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2883 : | sub n_representative_branches | ||
2884 : | { | ||
2885 : | my $n = shift; | ||
2886 : | # Sort intervals by start point: | ||
2887 : | my @unprocessed = sort { $a->[1] <=> $b->[1] } | ||
2888 : | ( @_ == 1 ) ? @{ $_[0] } : @_; | ||
2889 : | my @active = (); | ||
2890 : | my ( $interval, $current_point ); | ||
2891 : | foreach $interval ( @unprocessed ) | ||
2892 : | { | ||
2893 : | $current_point = $interval->[1]; | ||
2894 : | # Filter out intervals that have ended. This is N**2 in the number | ||
2895 : | # of representatives. Fixing this would require maintaining a sorted | ||
2896 : | # active list. | ||
2897 : | @active = grep { $_->[2] > $current_point } @active; | ||
2898 : | push @active, $interval; | ||
2899 : | last if ( @active >= $n ); | ||
2900 : | } | ||
2901 : | |||
2902 : | my @ids = map { $_->[0] } @active; | ||
2903 : | return wantarray() ? @ids : \@ids; | ||
2904 : | } | ||
2905 : | |||
2906 : | |||
2907 : | #=============================================================================== | ||
2908 : | # | ||
2909 : | golsen | 1.1 | # Tree writing and reading |
2910 : | # | ||
2911 : | #=============================================================================== | ||
2912 : | overbeek | 1.7 | # writeNewickTree( $tree ) |
2913 : | # writeNewickTree( $tree, $file ) | ||
2914 : | # writeNewickTree( $tree, \*FH ) | ||
2915 : | golsen | 1.1 | #------------------------------------------------------------------------------- |
2916 : | sub writeNewickTree { | ||
2917 : | my ( $tree, $file ) = @_; | ||
2918 : | overbeek | 1.7 | my ( $fh, $close ) = open_output( $file ); |
2919 : | $fh or return; | ||
2920 : | print $fh ( strNewickTree( $tree ), "\n" ); | ||
2921 : | close $fh if $close; | ||
2922 : | golsen | 1.1 | } |
2923 : | |||
2924 : | |||
2925 : | #------------------------------------------------------------------------------- | ||
2926 : | # fwriteNewickTree( $file, $tree ) # Args reversed to writeNewickTree | ||
2927 : | #------------------------------------------------------------------------------- | ||
2928 : | sub fwriteNewickTree { writeNewickTree( $_[1], $_[0] ) } | ||
2929 : | |||
2930 : | |||
2931 : | #------------------------------------------------------------------------------- | ||
2932 : | # $treestring = strNewickTree( $tree ) | ||
2933 : | #------------------------------------------------------------------------------- | ||
2934 : | sub strNewickTree { | ||
2935 : | my $node = shift @_; | ||
2936 : | strNewickSubtree( $node, "" ) . ";"; | ||
2937 : | } | ||
2938 : | |||
2939 : | |||
2940 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2941 : | # $string = strNewickSubtree( $node, $prefix ) | ||
2942 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2943 : | sub strNewickSubtree { | ||
2944 : | my ( $node, $prefix ) = @_; | ||
2945 : | my $s; | ||
2946 : | |||
2947 : | $s = strNewickComments( newick_c1( $node ), $prefix ); | ||
2948 : | if ( $s ) { $prefix = " " } | ||
2949 : | |||
2950 : | my $ndesc; | ||
2951 : | if ( $ndesc = newick_n_desc( $node ) ) { | ||
2952 : | for (my $d = 1; $d <= $ndesc; $d++) { | ||
2953 : | $s .= ( ( $d == 1 ) ? $prefix . "(" : "," ) | ||
2954 : | . strNewickSubtree( newick_desc_i( $node, $d ), " " ); | ||
2955 : | } | ||
2956 : | |||
2957 : | $s .= ")" . strNewickComments( newick_c2( $node ), " " ); | ||
2958 : | $prefix = " "; | ||
2959 : | } | ||
2960 : | |||
2961 : | if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) { | ||
2962 : | $s .= $prefix | ||
2963 : | . q_newick_lbl( $node ) | ||
2964 : | . strNewickComments( newick_c3( $node ), " " ); | ||
2965 : | } | ||
2966 : | |||
2967 : | if ( defined( newick_x( $node ) ) ) { | ||
2968 : | $s .= ":" | ||
2969 : | . strNewickComments( newick_c4( $node ), " " ) | ||
2970 : | . sprintf( " %.6f", newick_x( $node ) ) | ||
2971 : | . strNewickComments( newick_c5( $node ), " " ); | ||
2972 : | } | ||
2973 : | |||
2974 : | $s; | ||
2975 : | } | ||
2976 : | |||
2977 : | |||
2978 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2979 : | # $string = strNewickComments( $clist, $prefix ) | ||
2980 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2981 : | sub strNewickComments { | ||
2982 : | my ( $clist, $prefix ) = @_; | ||
2983 : | array_ref( $clist ) && ( @$clist > 0 ) || return ""; | ||
2984 : | $prefix . "[" . join( "] [", @$clist ) . "]"; | ||
2985 : | } | ||
2986 : | |||
2987 : | |||
2988 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2989 : | # $quoted_label = q_newick_lbl( $label ) | ||
2990 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
2991 : | sub q_newick_lbl { | ||
2992 : | my $lbl = newick_lbl( $_[0] ); | ||
2993 : | defined( $lbl ) && ( $lbl ne "" ) || return undef; | ||
2994 : | |||
2995 : | if ( $lbl =~ m/^[^][()_:;,]+$/ # Anything but []()_:;, | ||
2996 : | && $lbl !~ m/^'/ ) { # and does not start with ' | ||
2997 : | $lbl =~ s/ /_/g; # Recode blanks as _ | ||
2998 : | return $lbl; | ||
2999 : | } | ||
3000 : | |||
3001 : | else { | ||
3002 : | $lbl =~ s/'/''/g; # Double existing single quote marks | ||
3003 : | return q(') . $lbl . q('); # Wrap in single quote marks | ||
3004 : | } | ||
3005 : | } | ||
3006 : | |||
3007 : | |||
3008 : | #=============================================================================== | ||
3009 : | # $treestring = formatNewickTree( $tree ) | ||
3010 : | #=============================================================================== | ||
3011 : | sub formatNewickTree { | ||
3012 : | formatNewickSubtree( $_[0], "", "" ) . ";"; | ||
3013 : | } | ||
3014 : | |||
3015 : | |||
3016 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3017 : | # $string = formatNewickSubtree( $node, $prefix, $indent ) | ||
3018 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3019 : | sub formatNewickSubtree { | ||
3020 : | my ( $node, $prefix, $indent ) = @_; | ||
3021 : | my $s; | ||
3022 : | |||
3023 : | $s = formatNewickComments( newick_c1( $node ), $prefix, $indent ); | ||
3024 : | if ( $s ) { $prefix = "\n$indent" } | ||
3025 : | |||
3026 : | if ( my $ndesc = newick_n_desc( $node ) ) { | ||
3027 : | for (my $d = 1; $d <= $ndesc; $d++) { | ||
3028 : | $s .= ( ( $d == 1 ) ? $prefix . "(" : ",\n$indent " ) | ||
3029 : | . formatNewickSubtree( newick_desc_i( $node, $d ), " ", $indent . " " ); | ||
3030 : | } | ||
3031 : | |||
3032 : | $s .= "\n$indent)" . formatNewickComments( newick_c2( $node ), " ", $indent ); | ||
3033 : | $prefix = " "; | ||
3034 : | } | ||
3035 : | |||
3036 : | if ( defined( newick_lbl( $node ) ) && newick_lbl( $node ) ) { | ||
3037 : | $s .= $prefix | ||
3038 : | . q_newick_lbl( $node ) | ||
3039 : | . formatNewickComments( newick_c3( $node ), " ", $indent ); | ||
3040 : | } | ||
3041 : | |||
3042 : | if ( defined( newick_x( $node ) ) ) { | ||
3043 : | $s .= ":" | ||
3044 : | . formatNewickComments( newick_c4( $node ), " ", $indent ) | ||
3045 : | . sprintf(" %.6f", newick_x( $node ) ) | ||
3046 : | . formatNewickComments( newick_c5( $node ), " ", $indent ); | ||
3047 : | } | ||
3048 : | |||
3049 : | $s; | ||
3050 : | } | ||
3051 : | |||
3052 : | |||
3053 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3054 : | # $string = formatNewickComments( $clist, $prefix, $indent ) | ||
3055 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3056 : | sub formatNewickComments { | ||
3057 : | my ( $clist, $prefix, $indent ) = @_; | ||
3058 : | array_ref( $clist ) && @$clist || return ""; | ||
3059 : | $prefix . "[" . join( "] [", @$clist ) . "]"; | ||
3060 : | } | ||
3061 : | |||
3062 : | |||
3063 : | #=============================================================================== | ||
3064 : | overbeek | 1.7 | # $tree = read_newick_tree( $file ) # reads to a semicolon |
3065 : | # @trees = read_newick_trees( $file ) # reads to end of file | ||
3066 : | #=============================================================================== | ||
3067 : | |||
3068 : | sub read_newick_tree | ||
3069 : | { | ||
3070 : | my $file = shift; | ||
3071 : | my ( $fh, $close ) = open_input( $file ); | ||
3072 : | my $tree; | ||
3073 : | my @lines = (); | ||
3074 : | golsen | 1.9 | foreach ( <$fh> ) |
3075 : | overbeek | 1.7 | { |
3076 : | chomp; | ||
3077 : | push @lines, $_; | ||
3078 : | if ( /;/ ) | ||
3079 : | { | ||
3080 : | $tree = parse_newick_tree_str( join( ' ', @lines ) ); | ||
3081 : | last; | ||
3082 : | } | ||
3083 : | } | ||
3084 : | close $fh if $close; | ||
3085 : | |||
3086 : | $tree; | ||
3087 : | } | ||
3088 : | |||
3089 : | |||
3090 : | sub read_newick_trees | ||
3091 : | { | ||
3092 : | my $file = shift; | ||
3093 : | my ( $fh, $close ) = open_input( $file ); | ||
3094 : | my @trees = (); | ||
3095 : | my @lines = (); | ||
3096 : | golsen | 1.9 | foreach ( <$fh> ) |
3097 : | overbeek | 1.7 | { |
3098 : | chomp; | ||
3099 : | push @lines, $_; | ||
3100 : | if ( /;/ ) | ||
3101 : | { | ||
3102 : | push @trees, parse_newick_tree_str( join( ' ', @lines ) ); | ||
3103 : | @lines = () | ||
3104 : | } | ||
3105 : | } | ||
3106 : | close $fh if $close; | ||
3107 : | |||
3108 : | @trees; | ||
3109 : | } | ||
3110 : | |||
3111 : | |||
3112 : | #=============================================================================== | ||
3113 : | golsen | 1.1 | # Tree reader adapted from the C language reader in fastDNAml |
3114 : | # | ||
3115 : | # $tree = parse_newick_tree_str( $string ) | ||
3116 : | #=============================================================================== | ||
3117 : | sub parse_newick_tree_str { | ||
3118 : | my $s = shift @_; | ||
3119 : | |||
3120 : | my ( $ind, $rootnode ) = parse_newick_subtree( $s, 0 ); | ||
3121 : | if ( substr( $s, $ind, 1 ) ne ";") { warn "warning: tree missing ';'\n" } | ||
3122 : | $rootnode; | ||
3123 : | } | ||
3124 : | |||
3125 : | |||
3126 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3127 : | # Read a subtrees recursively (everything of tree but a semicolon) | ||
3128 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3129 : | sub parse_newick_subtree { | ||
3130 : | my ( $s, $ind ) = @_; | ||
3131 : | |||
3132 : | my $newnode = []; | ||
3133 : | my @dlist = (); | ||
3134 : | my ( $lbl, $x, $c1, $c2, $c3, $c4, $c5 ); | ||
3135 : | golsen | 1.9 | |
3136 : | golsen | 1.1 | ( $ind, $c1 ) = getNextTreeChar( $s, $ind ); # Comment 1 |
3137 : | if ( ! defined( $ind ) ) { treeParseError( "missing subtree" ) } | ||
3138 : | if ( $c1 && @$c1 ) { set_newick_c1( $newnode, $c1 ) } | ||
3139 : | |||
3140 : | if ( substr( $s, $ind, 1 ) eq "(" ) { # New internal node | ||
3141 : | while ( ! @dlist || ( substr( $s, $ind, 1 ) eq "," ) ) { | ||
3142 : | my $desc; | ||
3143 : | ( $ind, $desc ) = parse_newick_subtree( $s, $ind+1 ); | ||
3144 : | if (! $ind) { return () } | ||
3145 : | push @dlist, $desc; | ||
3146 : | } | ||
3147 : | if ( substr( $s, $ind, 1 ) ne ")" ) { treeParseError( "missing ')'" ) } | ||
3148 : | |||
3149 : | ( $ind, $c2 ) = getNextTreeChar( $s, $ind+1 ); # Comment 2 | ||
3150 : | if ( $c2 && @$c2 ) { set_newick_c2( $newnode, $c2 ) } | ||
3151 : | ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); # Node label | ||
3152 : | } | ||
3153 : | |||
3154 : | elsif ( substr( $s, $ind, 1 ) =~ /[^][(,):;]/ ) { # New tip | ||
3155 : | ( $ind, $lbl ) = parseTreeNodeLabel( $s, $ind ); # Tip label | ||
3156 : | if (! $ind) { return () } | ||
3157 : | } | ||
3158 : | |||
3159 : | @dlist || $lbl || treeParseError( "no descendant list or label" ); | ||
3160 : | |||
3161 : | if ( @dlist ) { set_newick_desc_ref( $newnode, \@dlist ) } | ||
3162 : | if ( $lbl ) { set_newick_lbl( $newnode, $lbl ) } | ||
3163 : | |||
3164 : | ( $ind, $c3 ) = getNextTreeChar( $s, $ind ); # Comment 3 | ||
3165 : | if ( $c3 && @$c3 ) { set_newick_c3( $newnode, $c3 ) } | ||
3166 : | |||
3167 : | if (substr( $s, $ind, 1 ) eq ":") { # Branch length | ||
3168 : | ( $ind, $c4 ) = getNextTreeChar( $s, $ind+1 ); # Comment 4 | ||
3169 : | if ( $c4 && @$c4 ) { set_newick_c4( $newnode, $c4 ) } | ||
3170 : | ( $ind, $x ) = parseBranchLength( $s, $ind ); | ||
3171 : | if ( defined( $x ) ) { set_newick_x( $newnode, $x ) } | ||
3172 : | ( $ind, $c5 ) = getNextTreeChar( $s, $ind ); # Comment 5 | ||
3173 : | if ( $c5 && @$c5 ) { set_newick_c5( $newnode, $c5 ) } | ||
3174 : | } | ||
3175 : | |||
3176 : | ( $ind, $newnode ); | ||
3177 : | } | ||
3178 : | |||
3179 : | |||
3180 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3181 : | # Read a Newick tree label | ||
3182 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3183 : | sub parseTreeNodeLabel { # Empty string is permitted | ||
3184 : | my ( $s, $ind ) = @_; | ||
3185 : | my ( $lbl, $c ); | ||
3186 : | |||
3187 : | if ( substr( $s, $ind, 1 ) eq "'") { | ||
3188 : | my $ind1 = ++$ind; | ||
3189 : | |||
3190 : | while ( ) { | ||
3191 : | if ( ! defined( $c = substr( $s, $ind, 1 ) ) || $c eq "" ) { | ||
3192 : | treeParseError( "missing close quote on label '" . substr( $s, $ind1 ) . "'" ) | ||
3193 : | } | ||
3194 : | elsif ( $c ne "'" ) { $ind++ } | ||
3195 : | elsif ( substr( $s, $ind, 2 ) eq "''" ) { $ind += 2 } | ||
3196 : | else { last } | ||
3197 : | } | ||
3198 : | |||
3199 : | $lbl = substr( $s, $ind1, $ind-$ind1 ); | ||
3200 : | $lbl =~ s/''/'/g; | ||
3201 : | $ind++; | ||
3202 : | } | ||
3203 : | |||
3204 : | else { | ||
3205 : | my $ind1 = $ind; | ||
3206 : | while ( defined( $c = substr($s, $ind, 1) ) && $c ne "" && $c !~ /[][\s(,):;]/ ) { $ind++ } | ||
3207 : | $lbl = substr( $s, $ind1, $ind-$ind1 ); | ||
3208 : | $lbl =~ s/_/ /g; | ||
3209 : | } | ||
3210 : | |||
3211 : | ( $ind, $lbl ); | ||
3212 : | } | ||
3213 : | |||
3214 : | |||
3215 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3216 : | # Read a Newick tree branch length | ||
3217 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3218 : | sub parseBranchLength { | ||
3219 : | my ( $s, $ind ) = @_; | ||
3220 : | |||
3221 : | my $c = substr( $s, $ind, 1 ); | ||
3222 : | |||
3223 : | my $sign = ( $c ne "-" ) ? 1 : -1; # Sign | ||
3224 : | if ( $c =~ /[-+]/ ) { $c = substr( $s, ++$ind, 1 ) } | ||
3225 : | |||
3226 : | if ( $c !~ /^[.0-9]$/ ) { # Allows starting with decimal | ||
3227 : | treeParseError( "invalid branch length character '$c'" ) | ||
3228 : | } | ||
3229 : | |||
3230 : | my $v = 0; | ||
3231 : | while ( $c =~ /[0-9]/ ) { # Whole number | ||
3232 : | $v = 10 * $v + $c; | ||
3233 : | $c = substr( $s, ++$ind, 1 ); | ||
3234 : | } | ||
3235 : | |||
3236 : | if ( $c eq "." ) { # Fraction | ||
3237 : | my $f = 0.1; | ||
3238 : | $c = substr( $s, ++$ind, 1 ); | ||
3239 : | while ( $c =~ /[0-9]/ ) { | ||
3240 : | $v += $f * $c; | ||
3241 : | $f *= 0.1; | ||
3242 : | $c = substr( $s, ++$ind, 1 ); | ||
3243 : | } | ||
3244 : | } | ||
3245 : | |||
3246 : | $v *= $sign; | ||
3247 : | |||
3248 : | if ( $c =~ /[dDeEgG]/ ) { # Exponent | ||
3249 : | $c = substr( $s, ++$ind, 1 ); | ||
3250 : | my $esign = ( $c ne "-" ) ? 1 : -1; | ||
3251 : | if ( $c =~ /^[-+]$/ ) { $c = substr( $s, ++$ind, 1 ) } | ||
3252 : | if ( $c !~ /^[0-9]$/ ) { | ||
3253 : | treeParseError( "missing branch length exponent '$c'" ) | ||
3254 : | } | ||
3255 : | |||
3256 : | my $e = 0; | ||
3257 : | while ( $c =~ /[0-9]/ ) { | ||
3258 : | $e = 10 * $e + $c; | ||
3259 : | $c = substr( $s, ++$ind, 1 ); | ||
3260 : | } | ||
3261 : | $e *= $esign; | ||
3262 : | $v *= 10**$e; | ||
3263 : | } | ||
3264 : | |||
3265 : | ( $ind, $v ); | ||
3266 : | } | ||
3267 : | |||
3268 : | |||
3269 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3270 : | # ( $index, /@commentlist ) = getNextTreeChar( $string, $index ) | ||
3271 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3272 : | sub getNextTreeChar { # Move to next nonblank, noncomment character | ||
3273 : | my ( $s, $ind ) = @_; | ||
3274 : | |||
3275 : | my @clist = (); | ||
3276 : | |||
3277 : | # Skip white space | ||
3278 : | if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) } | ||
3279 : | |||
3280 : | # Loop while it is a comment: | ||
3281 : | while ( substr( $s, $ind, 1 ) eq "[" ) { | ||
3282 : | $ind++; | ||
3283 : | golsen | 1.11 | my $depth = 1; |
3284 : | my $ind2 = $ind; | ||
3285 : | golsen | 1.1 | |
3286 : | # Find end | ||
3287 : | golsen | 1.11 | while ( $depth > 0 ) |
3288 : | { | ||
3289 : | if ( substr( $s, $ind2 ) =~ /^([^][]*\[)/ ) # nested [ ... ] | ||
3290 : | { | ||
3291 : | $ind2 += length( $1 ); # Points at char just past [ | ||
3292 : | $depth++; # If nested comments are allowed | ||
3293 : | } | ||
3294 : | elsif ( substr( $s, $ind2 ) =~ /^([^][]*\])/ ) # close bracket | ||
3295 : | { | ||
3296 : | $ind2 += length( $1 ); # Points at char just past ] | ||
3297 : | $depth--; | ||
3298 : | } | ||
3299 : | else | ||
3300 : | { | ||
3301 : | treeParseError( "comment missing closing bracket '[" | ||
3302 : | . substr( $s, $ind ) . "'" ) | ||
3303 : | } | ||
3304 : | golsen | 1.1 | } |
3305 : | |||
3306 : | golsen | 1.11 | my $comment = substr( $s, $ind, $ind2-$ind-1 ); |
3307 : | golsen | 1.1 | if ( $comment =~ m/\S/ ) { push @clist, $comment } |
3308 : | |||
3309 : | golsen | 1.11 | $ind = $ind2; |
3310 : | golsen | 1.1 | |
3311 : | # Skip white space | ||
3312 : | if ( substr( $s, $ind ) =~ /^(\s+)/ ) { $ind += length( $1 ) } | ||
3313 : | } | ||
3314 : | |||
3315 : | ( $ind, @clist ? \@clist : undef ) | ||
3316 : | } | ||
3317 : | |||
3318 : | |||
3319 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3320 : | # treeParseError( $message ) | ||
3321 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3322 : | sub treeParseError { die "Error: parse_newick_subtree: " . $_[0] . "\n" } | ||
3323 : | |||
3324 : | |||
3325 : | #=============================================================================== | ||
3326 : | # Make a printer plot of a tree: | ||
3327 : | # | ||
3328 : | # $node newick tree root node | ||
3329 : | overbeek | 1.4 | # $file undef (= \*STDOUT), \*STDOUT, \*STDERR, or a file name. |
3330 : | golsen | 1.1 | # $width the approximate characters for the tree without labels |
3331 : | # $min_dx the minimum horizontal branch length | ||
3332 : | # $dy the vertical space per taxon | ||
3333 : | # | ||
3334 : | overbeek | 1.4 | # printer_plot_newick( $node, $file (D=\*STDOUT), $width (D=68), $min_dx (D=2), $dy (D=1) ) |
3335 : | golsen | 1.1 | #=============================================================================== |
3336 : | sub printer_plot_newick { | ||
3337 : | my ( $node, $file, $width, $min_dx, $dy ) = @_; | ||
3338 : | |||
3339 : | overbeek | 1.7 | my ( $fh, $close ) = open_output( $file ); |
3340 : | $fh or return; | ||
3341 : | golsen | 1.1 | |
3342 : | print $fh join( "\n", text_plot_newick( $node, $width, $min_dx, $dy ) ), "\n"; | ||
3343 : | if ( $close ) { close $fh } | ||
3344 : | } | ||
3345 : | |||
3346 : | |||
3347 : | #=============================================================================== | ||
3348 : | # Make a text plot of a tree: | ||
3349 : | # | ||
3350 : | # $node newick tree root node | ||
3351 : | # $width the approximate characters for the tree without labels | ||
3352 : | # $min_dx the minimum horizontal branch length | ||
3353 : | # $dy the vertical space per taxon | ||
3354 : | # | ||
3355 : | # @textlines = text_plot_newick( $node, $width (D=68), $min_dx (D=2), $dy (D=1) ) | ||
3356 : | #=============================================================================== | ||
3357 : | sub text_plot_newick { | ||
3358 : | my ( $node, $width, $min_dx, $dy ) = @_; | ||
3359 : | array_ref( $node ) || die "Bad node passed to text_plot_newick\n"; | ||
3360 : | defined( $min_dx ) and ( $min_dx >= 0 ) or $min_dx = 2; | ||
3361 : | defined( $dy ) and ( $dy >= 1 ) or $dy = 1; | ||
3362 : | defined( $width ) or $width = 68; | ||
3363 : | |||
3364 : | $min_dx = int( $min_dx ); | ||
3365 : | $dy = int( $dy ); | ||
3366 : | golsen | 1.5 | my $x_scale = $width / ( newick_max_X( $node ) || 1 ); # Div by zero caught by RAE |
3367 : | golsen | 1.1 | |
3368 : | my $hash = {}; | ||
3369 : | layout_printer_plot( $node, $hash, 0, -0.5 * $dy, $x_scale, $min_dx, $dy ); | ||
3370 : | |||
3371 : | # dump_tree_hash( $node, $hash ); exit; | ||
3372 : | |||
3373 : | # Generate the lines of the tree one by one: | ||
3374 : | |||
3375 : | my ( $y1, $y2 ) = @{ $hash->{ $node } }; | ||
3376 : | map { text_tree_row( $node, $hash, $_, "", "+" ) } ( $y1 .. $y2 ); | ||
3377 : | } | ||
3378 : | |||
3379 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3380 : | # ( $xmax, $ymax, $root_y ) = layout_printer_plot( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) | ||
3381 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3382 : | sub layout_printer_plot { | ||
3383 : | my ( $node, $hash, $x0, $y0, $x_scale, $min_dx, $dy ) = @_; | ||
3384 : | array_ref( $node ) || die "Bad node ref passed to layout_printer_plot\n"; | ||
3385 : | hash_ref( $hash ) || die "Bad hash ref passed to layout_printer_plot\n"; | ||
3386 : | |||
3387 : | my $dx = newick_x( $node ); | ||
3388 : | if ( defined( $dx ) ) { | ||
3389 : | $dx *= $x_scale; | ||
3390 : | $dx >= $min_dx or $dx = $min_dx; | ||
3391 : | } | ||
3392 : | else { | ||
3393 : | $dx = ( $x0 > 0 ) ? $min_dx : 0; | ||
3394 : | } | ||
3395 : | $dx = int( $dx + 0.4999 ); | ||
3396 : | |||
3397 : | my ( $x, $xmax, $y, $ymax, $y1, $y2, $yn1, $yn2 ); | ||
3398 : | |||
3399 : | $x = $x0 + $dx; | ||
3400 : | $y1 = int( $y0 + 0.5 * $dy + 0.4999 ); | ||
3401 : | my @dl = newick_desc_list( $node ); | ||
3402 : | |||
3403 : | if ( ! @dl ) { # A tip | ||
3404 : | $xmax = $x; | ||
3405 : | $y = $yn1 = $yn2 = $y2 = $y1; | ||
3406 : | $ymax = $y + 0.5 * $dy; | ||
3407 : | } | ||
3408 : | |||
3409 : | else { # A subtree | ||
3410 : | $xmax = -1; | ||
3411 : | my $xmaxi; | ||
3412 : | my $yi; | ||
3413 : | my @ylist = (); | ||
3414 : | $ymax = $y0; | ||
3415 : | |||
3416 : | foreach ( @dl ) { | ||
3417 : | ( $xmaxi, $ymax, $yi ) = layout_printer_plot( $_, $hash, $x, $ymax, $x_scale, $min_dx, $dy ); | ||
3418 : | push @ylist, $yi; | ||
3419 : | if ( $xmaxi > $xmax ) { $xmax = $xmaxi } | ||
3420 : | } | ||
3421 : | |||
3422 : | # Use of y-list is overkill for saving first and last values, | ||
3423 : | # but eases implimentation of alternative y-value calculations. | ||
3424 : | |||
3425 : | $yn1 = $ylist[ 0]; | ||
3426 : | $yn2 = $ylist[-1]; | ||
3427 : | $y = int( 0.5 * ( $yn1 + $yn2 ) + 0.4999 ); | ||
3428 : | } | ||
3429 : | |||
3430 : | $y2 = int( $ymax - 0.5 * $dy + 0.4999 ); | ||
3431 : | |||
3432 : | $hash->{ $node } = [ $y1, $y2, $x0, $x, $y, $yn1, $yn2 ]; | ||
3433 : | ( $xmax, $ymax, $y ); | ||
3434 : | } | ||
3435 : | |||
3436 : | |||
3437 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3438 : | # Debug routine | ||
3439 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3440 : | sub dump_tree { | ||
3441 : | my ( $node, $prefix ) = @_; | ||
3442 : | defined( $prefix ) or $prefix = ""; | ||
3443 : | print STDERR $prefix, join(", ", @$node), "\n"; | ||
3444 : | my @dl = $node->[0] ? @{$node->[0]} : (); | ||
3445 : | foreach ( @dl ) { dump_tree( $_, $prefix . " " ) } | ||
3446 : | $prefix or print STDERR "\n"; | ||
3447 : | } | ||
3448 : | |||
3449 : | |||
3450 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3451 : | # Debug routine | ||
3452 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3453 : | sub dump_tree_hash { | ||
3454 : | my ( $node, $hash, $prefix ) = @_; | ||
3455 : | defined( $prefix ) or print STDERR "node; [ y1, y2, x0, x, y, yn1, yn2 ]\n" and $prefix = ""; | ||
3456 : | print STDERR $prefix, join(", ", @$node), "; ", join(", ", @{ $hash->{ $node } } ), "\n"; | ||
3457 : | my @dl = $node->[0] ? @{$node->[0]} : (); | ||
3458 : | foreach (@dl) { dump_tree_hash( $_, $hash, $prefix . " " ) } | ||
3459 : | } | ||
3460 : | |||
3461 : | |||
3462 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3463 : | # $line = text_tree_row( $node, $hash, $row, $line, $symb ) | ||
3464 : | #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3465 : | sub text_tree_row { | ||
3466 : | my ( $node, $hash, $row, $line, $symb ) = @_; | ||
3467 : | |||
3468 : | my ( $y1, $y2, $x0, $x, $y, $yn1, $yn2 ) = @{ $hash->{ $node } }; | ||
3469 : | if ( $row < $y1 || $row > $y2 ) { return $line } | ||
3470 : | |||
3471 : | if ( length( $line ) < $x0 ) { $line .= " " x ( $x0 - length( $line ) ) } | ||
3472 : | |||
3473 : | if ( $row == $y ) { | ||
3474 : | $line = substr( $line, 0, $x0 ) . $symb . (( $x > $x0 ) ? "-" x ($x - $x0) : ""); | ||
3475 : | } | ||
3476 : | |||
3477 : | elsif ( $row > $yn1 && $row < $yn2 ) { | ||
3478 : | if ( length( $line ) < $x ) { $line .= " " x ( $x - length( $line ) ) . "|" } | ||
3479 : | else { substr( $line, $x ) = "|" } | ||
3480 : | } | ||
3481 : | |||
3482 : | my @dl = newick_desc_list( $node ); | ||
3483 : | |||
3484 : | if ( @dl < 1 ) { | ||
3485 : | $line .= " " . $node->[1]; | ||
3486 : | } | ||
3487 : | |||
3488 : | else { | ||
3489 : | my @list = map { [ $_, "+" ] } @dl; # Print symbol for line | ||
3490 : | $list[ 0]->[1] = "/"; | ||
3491 : | $list[-1]->[1] = "\\"; | ||
3492 : | |||
3493 : | foreach ( @list ) { | ||
3494 : | my ( $n, $s ) = @$_; | ||
3495 : | if ( $row >= $hash->{ $n }->[0] && $row <= $hash->{ $n }->[1] ) { | ||
3496 : | $line = text_tree_row( $n, $hash, $row, $line, $s ); | ||
3497 : | } | ||
3498 : | } | ||
3499 : | |||
3500 : | if ( $row == $y ) { substr( $line, $x, 1 ) = "+" } | ||
3501 : | } | ||
3502 : | |||
3503 : | return $line; | ||
3504 : | } | ||
3505 : | |||
3506 : | |||
3507 : | overbeek | 1.7 | #=============================================================================== |
3508 : | # Open an input file stream: | ||
3509 : | # | ||
3510 : | # ( $handle, undef ) = open_input( ); # \*STDIN | ||
3511 : | # ( $handle, undef ) = open_input( \*FH ); | ||
3512 : | # ( $handle, 1 ) = open_input( $file ); # need to close $handle | ||
3513 : | # | ||
3514 : | #=============================================================================== | ||
3515 : | sub open_input | ||
3516 : | { | ||
3517 : | my $file = shift; | ||
3518 : | my $fh; | ||
3519 : | if ( ! defined( $file ) ) { return ( \*STDIN ) } | ||
3520 : | elsif ( ref( $file ) eq 'GLOB' ) { return ( $file ) } | ||
3521 : | elsif ( open( $fh, "<$file" ) ) { return ( $fh, 1 ) } # Need to close | ||
3522 : | |||
3523 : | print STDERR "gjonewick::open_input could not open '$file' for reading\n"; | ||
3524 : | return undef; | ||
3525 : | } | ||
3526 : | |||
3527 : | |||
3528 : | #=============================================================================== | ||
3529 : | # Open an output file stream: | ||
3530 : | # | ||
3531 : | # ( $handle, undef ) = open_output( ); # \*STDOUT | ||
3532 : | # ( $handle, undef ) = open_output( \*FH ); | ||
3533 : | # ( $handle, 1 ) = open_output( $file ); # need to close $handle | ||
3534 : | # | ||
3535 : | #=============================================================================== | ||
3536 : | sub open_output | ||
3537 : | { | ||
3538 : | my $file = shift; | ||
3539 : | my $fh; | ||
3540 : | if ( ! defined( $file ) ) { return ( \*STDOUT ) } | ||
3541 : | elsif ( ref( $file ) eq 'GLOB' ) { return ( $file ) } | ||
3542 : | elsif ( ( open $fh, ">$file" ) ) { return ( $fh, 1 ) } # Need to close | ||
3543 : | |||
3544 : | print STDERR "gjonewick::open_output could not open '$file' for writing\n"; | ||
3545 : | return undef; | ||
3546 : | } | ||
3547 : | |||
3548 : | golsen | 1.9 | |
3549 : | #=============================================================================== | ||
3550 : | # Some subroutines copied from gjolists | ||
3551 : | #=============================================================================== | ||
3552 : | # Return the common prefix of two lists: | ||
3553 : | # | ||
3554 : | # @common = common_prefix( \@list1, \@list2 ) | ||
3555 : | #----------------------------------------------------------------------------- | ||
3556 : | sub common_prefix | ||
3557 : | { | ||
3558 : | my ($l1, $l2) = @_; | ||
3559 : | ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n"; | ||
3560 : | ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n"; | ||
3561 : | |||
3562 : | my $i = 0; | ||
3563 : | my $l1_i; | ||
3564 : | while ( defined( $l1_i = $l1->[$i] ) && $l1_i eq $l2->[$i] ) { $i++ } | ||
3565 : | |||
3566 : | return @$l1[ 0 .. ($i-1) ]; # perl handles negative range | ||
3567 : | } | ||
3568 : | |||
3569 : | |||
3570 : | #----------------------------------------------------------------------------- | ||
3571 : | # Return the unique suffixes of each of two lists: | ||
3572 : | # | ||
3573 : | # ( \@suffix1, \@suffix2 ) = unique_suffixes( \@list1, \@list2 ) | ||
3574 : | #----------------------------------------------------------------------------- | ||
3575 : | sub unique_suffixes | ||
3576 : | { | ||
3577 : | my ($l1, $l2) = @_; | ||
3578 : | ref($l1) eq "ARRAY" || die "common_prefix: arg 1 is not an array ref\n"; | ||
3579 : | ref($l2) eq "ARRAY" || die "common_prefix: arg 2 is not an array ref\n"; | ||
3580 : | |||
3581 : | my $i = 0; | ||
3582 : | my @l1 = @$l1; | ||
3583 : | my @l2 = @$l2; | ||
3584 : | my $l1_i; | ||
3585 : | while ( defined( $l1_i = $l1[$i] ) && $l1_i eq $l2[$i] ) { $i++ } | ||
3586 : | |||
3587 : | splice @l1, 0, $i; | ||
3588 : | splice @l2, 0, $i; | ||
3589 : | return ( \@l1, \@l2 ); | ||
3590 : | } | ||
3591 : | |||
3592 : | |||
3593 : | #------------------------------------------------------------------------------- | ||
3594 : | # List of values duplicated in a list (stable in order by second occurance): | ||
3595 : | # | ||
3596 : | # @dups = duplicates( @list ) | ||
3597 : | #------------------------------------------------------------------------------- | ||
3598 : | sub duplicates | ||
3599 : | { | ||
3600 : | my %cnt = (); | ||
3601 : | grep { ++$cnt{$_} == 2 } @_; | ||
3602 : | } | ||
3603 : | |||
3604 : | |||
3605 : | #------------------------------------------------------------------------------- | ||
3606 : | # Randomize the order of a list: | ||
3607 : | # | ||
3608 : | # @random = random_order( @list ) | ||
3609 : | #------------------------------------------------------------------------------- | ||
3610 : | sub random_order | ||
3611 : | { | ||
3612 : | my ( $i, $j ); | ||
3613 : | for ( $i = @_ - 1; $i > 0; $i-- ) | ||
3614 : | { | ||
3615 : | $j = int( ($i+1) * rand() ); | ||
3616 : | ( $_[$i], $_[$j] ) = ( $_[$j], $_[$i] ); # Interchange i and j | ||
3617 : | } | ||
3618 : | |||
3619 : | @_; | ||
3620 : | } | ||
3621 : | |||
3622 : | |||
3623 : | #----------------------------------------------------------------------------- | ||
3624 : | # Intersection of two or more sets: | ||
3625 : | # | ||
3626 : | # @intersection = intersection( \@set1, \@set2, ... ) | ||
3627 : | #----------------------------------------------------------------------------- | ||
3628 : | sub intersection | ||
3629 : | { | ||
3630 : | my $set = shift; | ||
3631 : | my @intersection = @$set; | ||
3632 : | |||
3633 : | foreach $set ( @_ ) | ||
3634 : | { | ||
3635 : | my %set = map { $_ => 1 } @$set; | ||
3636 : | @intersection = grep { exists $set{ $_ } } @intersection; | ||
3637 : | } | ||
3638 : | |||
3639 : | @intersection; | ||
3640 : | } | ||
3641 : | |||
3642 : | |||
3643 : | #----------------------------------------------------------------------------- | ||
3644 : | # Elements in set 1, but not set 2: | ||
3645 : | # | ||
3646 : | # @difference = set_difference( \@set1, \@set2 ) | ||
3647 : | #----------------------------------------------------------------------------- | ||
3648 : | sub set_difference | ||
3649 : | { | ||
3650 : | my ($set1, $set2) = @_; | ||
3651 : | my %set2 = map { $_ => 1 } @$set2; | ||
3652 : | grep { ! ( exists $set2{$_} ) } @$set1; | ||
3653 : | } | ||
3654 : | |||
3655 : | |||
3656 : | golsen | 1.1 | 1; |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |