[Bio] / FigKernelPackages / InsertIntoTree.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/InsertIntoTree.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (view) (download) (as text)

1 : overbeek 1.1 #
2 : golsen 1.13 # Copyright (c) 2003-2007 University of Chicago and Fellowship
3 : overbeek 1.1 # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package InsertIntoTree;
19 : golsen 1.11
20 : golsen 1.13 #===============================================================================
21 : golsen 1.14 # A package of functions for adding to a tree:
22 : golsen 1.13 #
23 : golsen 1.14 # ( $tree, $likelihood ) = add_seq_to_tree( \%options );
24 :     # ( $tree, $likelihood ) = add_seq_to_tree( %options );
25 :     # $tree = add_seq_to_tree( \%options );
26 :     # $tree = add_seq_to_tree( %options );
27 :     #
28 :     # or with option all => 1:
29 :     #
30 :     # @tree_likelihood_pairs = add_seq_to_tree( \%options );
31 :     # @tree_likelihood_pairs = add_seq_to_tree( %options );
32 :     #
33 :     #
34 :     # $newtree = add_seq_to_big_tree( \%options )
35 : golsen 1.13 #
36 :     #===============================================================================
37 :    
38 : overbeek 1.1 use strict;
39 : golsen 1.11 use gjonewicklib;
40 : golsen 1.14 use gjophylip;
41 :    
42 :     # use Carp;
43 :     # use Data::Dumper;
44 : overbeek 1.1
45 : golsen 1.13 #===============================================================================
46 :     # Function that tests all possible branches for the insertion of a new
47 :     # sequence into an existing tree.
48 :     #
49 : golsen 1.14 # ( $tree, $likelihood ) = add_seq_to_tree( \%options );
50 :     # ( $tree, $likelihood ) = add_seq_to_tree( %options );
51 :     # $tree = add_seq_to_tree( \%options );
52 :     # $tree = add_seq_to_tree( %options );
53 :     #
54 :     # or when all => 1 is included among the options
55 :     #
56 :     # @tree_likelihood_pairs = add_seq_to_tree( \%options );
57 :     # @tree_likelihood_pairs = add_seq_to_tree( %options );
58 :     #
59 : golsen 1.13 #
60 :     # Required "options":
61 : golsen 1.14 # -------------------------------------------------------------------------
62 :     # align => $align = [ [id,def,seq ], ...] or [ [id,seq ], ...]
63 :     # id => $id id of sequence to add to tree
64 :     # tree => $tree overbeek or gjo starting tree
65 :     # -------------------------------------------------------------------------
66 :     #
67 : golsen 1.13 #
68 : golsen 1.14 # Other options:
69 :     # -------------------------------------------------------------------------
70 :     # all => 1 Return list of all [ $tree, $likelihood ] pairs
71 :     # tree => $tree overbeek or gjo starting tree
72 :     # -------------------------------------------------------------------------
73 :     #
74 :     #
75 :     # Also accepts mamy gjophylip::proml options, including:
76 :     # -------------------------------------------------------------------------
77 :     # alpha, categories, hmm, model, program, tmp, tmp_dir, weights
78 :     # -------------------------------------------------------------------------
79 : golsen 1.13 #
80 :     #===============================================================================
81 : golsen 1.14 sub add_seq_to_tree
82 : golsen 1.11 {
83 : golsen 1.13 my %args = ( ref( $_[0] ) eq 'HASH' ) ? %{$_[0]} : @_;
84 : overbeek 1.1
85 : golsen 1.14 my $align = $args{ align };
86 :     my $tree = $args{ tree };
87 :     my $id = $args{ id };
88 :    
89 :     my $type = 'gjo';
90 :     if ( gjonewicklib::is_overbeek_tree( $tree ) )
91 : overbeek 1.12 {
92 : golsen 1.14 $type = 'overbeek';
93 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree )
94 : overbeek 1.12 }
95 : golsen 1.14 $tree = &gjonewicklib::uproot_newick( $tree );
96 :     &gjonewicklib::newick_set_all_branches( $tree, 0.1 ); # In case of undefs
97 : overbeek 1.12
98 : golsen 1.14 my $tip_node = [ [], $id, 0.1 ];
99 :     my $trees = &add_tip_to_each_branch( $tree, $tip_node );
100 : overbeek 1.1
101 : golsen 1.14 my %options = ( alignment => $align,
102 :     tree_format => 'gjo',
103 :     user_trees => $trees,
104 :     &tree_options( \%args )
105 : golsen 1.11 );
106 : overbeek 1.1
107 : golsen 1.14 my @tree_and_likelihood = sort { $b->[1] <=> $a->[1] }
108 :     gjophylip::proml( \%options );
109 : golsen 1.13
110 :     # Return them all?
111 :    
112 : golsen 1.14 if ( $options{all} )
113 : golsen 1.13 {
114 : golsen 1.14 if ( $type eq 'overbeek' )
115 : golsen 1.13 {
116 : golsen 1.14 foreach ( @tree_and_likelihood )
117 :     {
118 :     $_->[0] = gjonewicklib::gjonewick_to_overbeek( $_->[0] )
119 :     }
120 : golsen 1.13 }
121 :     return wantarray ? @tree_and_likelihood
122 :     : \@tree_and_likelihood
123 :     }
124 :    
125 :     my ( $newtree, $lnlik ) = @{ $tree_and_likelihood[0] };
126 : overbeek 1.12 if ($type eq "overbeek")
127 :     {
128 : golsen 1.14 $newtree = &gjonewicklib::gjonewick_to_overbeek( $newtree );
129 : overbeek 1.12 }
130 : golsen 1.14
131 : golsen 1.11 return wantarray ? ( $newtree, $lnlik ) : $newtree;
132 : overbeek 1.1 }
133 :    
134 :    
135 : golsen 1.14 #-------------------------------------------------------------------------------
136 :     # Add the tip along all branches
137 :     #-------------------------------------------------------------------------------
138 :     sub add_tip_to_each_branch
139 : golsen 1.11 {
140 : golsen 1.14 my ( $node, $tip, $root, $trees ) = @_;
141 :     $root ||= $node; # If no root, it is this node
142 :     $trees ||= []; # If no incoming list, make an empty one
143 :    
144 :     my $ndesc = $node->[0] ? @{$node->[0]} : 0;
145 :     for ( my $i = 0; $i < $ndesc; $i++ )
146 :     {
147 :     my $desc = $node->[0]->[$i];
148 :     &add_tip_to_each_branch( $desc ,$tip, $root, $trees );
149 :     my $new_node = [ [ $desc, $tip ], undef, 0.1 ];
150 :     $node->[0]->[$i] = $new_node; # Modify tree
151 :     push @$trees, &gjonewicklib::copy_newick_tree( $root ); # Save tree
152 :     $node->[0]->[$i] = $desc; # Restore tree
153 :     }
154 : overbeek 1.5
155 : golsen 1.14 return $trees;
156 : overbeek 1.1 }
157 :    
158 :    
159 : golsen 1.14 #===============================================================================
160 :     # add_seq_to_big_tree -- Subroutine superset of the functions of the scripts:
161 :     #
162 :     # bring_tree_up_to_ali [-c CheckpointFile] [-a Alpha] [-n NeighSz] [-t TmpDir] Ali Tree
163 :     # insert_prot_into_tree [-a alpha] [-n NeighSz] Ali Tree Id NJtree [Weights]
164 :     #
165 :     # $newtree = add_seq_to_big_tree( \%options )
166 :     #
167 :     #
168 :     # Required "options"
169 :     # -------------------------------------------------------------------------
170 :     # align => \@align # [id,seq] or [id,def,seq]
171 :     # tree => $tree # starting tree for addition(s)
172 :     # -------------------------------------------------------------------------
173 :     # $tree can be in overbeek or gjonewick format. The returned tree format
174 :     # matches that supplied.
175 :     #
176 :     #
177 :     # Optional "options"
178 :     # -------------------------------------------------------------------------
179 :     # checkpoint => $checkpointfile # D = none
180 :     # id => $id_to_add # D = all in align but not tree
181 :     # ids => \@ids_to_add # D = all in align but not tree
182 :     # neighborhood => $n_representatives # D = 40
183 :     # rough_tree => $rough_tree # D = protdist_neighbor tree
184 :     # tip_priority => \&tip_priority( $distance_to_tip, $seq_length )
185 :     # # D = 7 * log( $seq_length )
186 :     # # - log( $tip_distance )
187 :     # -------------------------------------------------------------------------
188 :     # $id_to_add defines a sequence in the alignment to be added to the tree.
189 :     # @ids_to_add is an ordered list of ids to add to the tree.
190 :     # If neither of the above is supplied, all sequences in the alignment but
191 :     # not in the tree are added, from longest to shortest.
192 :     # &tip_priority is a two argument function that assigns priorities for
193 :     # tip selection in representative subtrees. The first paramter is
194 :     # the distance from the focus to the tip in the current tree. The
195 :     # second parameter is the number of informative sites in the sequence.
196 :     #
197 :     #
198 :     # Options passed to proml and/or protdist_neighbor
199 :     # -------------------------------------------------------------------------
200 :     # alpha => $alpha # D = inf
201 :     # categories => [ \@category_rates, site_categories ]
202 :     # coef_of_var => 1/sqrt(alpha) # D = 0
203 :     # gamma_bins => $n_bins # D = 5
204 :     # invar_frac => $fraction # D = 0
205 :     # model
206 :     # neighbor => $path_to_neighbor
207 :     # persistance => $rate_correl_range # D = 0
208 :     # proml => $path_to_proml
209 :     # protdist => $path_to_protdist
210 :     # rate_hmm => [ [rate,fraction], ... ]
211 :     # tmp => $place_for_tmp_dir
212 :     # tmp_dir => $place_for_temp_files
213 :     # weights => $site_weights
214 :     # -------------------------------------------------------------------------
215 :     # If $tmp_dir is specified and it exists, niether the temporary files nor
216 :     # the directory are deleted.
217 :     #
218 :     #===============================================================================
219 :    
220 :     sub add_seq_to_big_tree
221 :     {
222 :     my %args = ref( $_[0] ) eq 'HASH' ? %{$_[0]}
223 :     : ref( $_[0] ) eq 'ARRAY' ? @{$_[0]}
224 :     : @_;
225 :     my $align = $args{ align } || $args{ alignment };
226 :     my $checkpoint = $args{ checkpoint };
227 :     my $id_to_ins = $args{ id } || $args{ ids };
228 :     my $rough_tree = $args{ rough_tree } || $args{ nj_tree };
229 :     my $size_rep = $args{ neighborhood } || 40;
230 :     my $tip_priority = $args{ tip_priority };
231 :     my $tree0 = $args{ tree };
232 :    
233 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
234 :     or print STDERR "add_seq_to_big_tree called without valid alignment"
235 :     and return undef;
236 :    
237 :     ( $tree0 && ( ref( $tree0 ) eq 'ARRAY' ) )
238 :     or print STDERR "add_seq_to_big_tree called without valid tree"
239 :     and return undef;
240 :    
241 :     if ( $tip_priority )
242 :     {
243 :     if ( ref( $tip_priority ) ne 'CODE' )
244 :     {
245 :     print STDERR "add_seq_to_big_tree:\n";
246 :     print STDERR " tip_priority option is not a function reference.\n";
247 :     print STDERR " Tree is unchanged.\n";
248 :     return $tree0;
249 :     }
250 :     }
251 :     else # Default measure of tip_priority for representative trees.
252 :     { #
253 :     # Balance sequence length and branch length such that a 50% decrease
254 :     # in branch length is required to permit a 10% decrease in sequence
255 :     # length.
256 :     #
257 :     # Called as: &$tip_priority( $distance_to_tip, $informative_sites )
258 :    
259 :     $tip_priority = sub { 7 * log( $_[1]+1 ) - log( $_[0]+0.0001 ) };
260 :     }
261 :    
262 :     # Build an index to the alignment and compute informative positions:
263 :    
264 :     my %in_align = map { $_->[0] => $_ } @$align;
265 :     my %inform = map { $_->[0] => &informative_sites( $_->[-1] ) } @$align;
266 :    
267 :     # Convert tree format if necessary
268 :    
269 :     my $format = gjonewicklib::is_overbeek_tree( $tree0 ) ? 'overbeek' : 'gjo';
270 :     my $tree = ( $format eq 'overbeek' ) ? gjonewicklib::overbeek_to_gjonewick( $tree0 )
271 :     : gjonewicklib::copy_newick_tree( $tree0 );
272 :    
273 :     my $tips = &gjonewicklib::newick_tip_list( $tree );
274 :     my %in_tree = map { $_ => 1 } @$tips;
275 :    
276 :     # Check for sequences in the tree that are not in the alignment:
277 :    
278 :     my @extra_tips = sort grep { ! $in_align{ $_ } } @$tips;
279 :     if ( @extra_tips )
280 :     {
281 :     print STDERR "add_seq_to_big_tree:\n";
282 :     print STDERR " Sequence(s) found in tree that are not in alignment:\n";
283 :     print STDERR " '", join( "', '", @extra_tips ), "'\n";
284 :     print STDERR " Tree is unchanged.\n";
285 :     return $tree0;
286 :     }
287 :    
288 :     my @ids_to_ins;
289 :     if ( $id_to_ins ) # User-supplied list. Order is respected.
290 :     {
291 :     @ids_to_ins = ref( $id_to_ins ) eq 'ARRAY' ? @$id_to_ins : ( $id_to_ins );
292 :    
293 :     my @orphan_ids = grep { ! $in_align{ $_ } } @ids_to_ins;
294 :     if ( @orphan_ids )
295 :     {
296 :     print STDERR "add_seq_to_big_tree:\n";
297 :     print STDERR " Sequence(s) to add that are not in alignment:\n";
298 :     print STDERR " '", join( "', '", @orphan_ids ), "'\n";
299 :     print STDERR " Tree is unchanged.\n";
300 :     return $tree0;
301 :     }
302 :    
303 :     my @already_in = grep { $in_tree{ $_ } } @ids_to_ins;
304 :     @ids_to_ins = grep { ! $in_tree{ $_ } } @ids_to_ins;
305 :     if ( @already_in )
306 :     {
307 :     print STDERR "add_seq_to_big_tree:\n";
308 :     print STDERR " Sequence(s) to add that are already in tree:\n";
309 :     print STDERR " '", join( "', '", @orphan_ids ), "'\n";
310 :     print STDERR " Attempting to continue.\n" if @ids_to_ins;
311 :     }
312 :    
313 :     if ( ! @ids_to_ins )
314 :     {
315 :     print STDERR "add_seq_to_big_tree:\n" if ! @already_in;
316 :     print STDERR " No sequences to add. Tree is unchanged.\n";
317 :     return $tree0;
318 :     }
319 :     }
320 :     else # Add all ids in alignment, but not tree, from longest to shortest.
321 :     {
322 :     @ids_to_ins = sort { $inform{ $b } <=> $inform{ $a } }
323 :     grep { ! $in_tree{ $_ } }
324 :     keys %in_align;
325 :     if ( ! @ids_to_ins )
326 :     {
327 :     print STDERR "add_seq_to_big_tree:\n";
328 :     print STDERR " All sequences in the alignment are in the tree. Tree is unchanged.\n";
329 :     return $tree0;
330 :     }
331 :     }
332 :    
333 :     # Does the guide tree exit? If so, check it:
334 :    
335 :     my $rough_tree0 = $rough_tree;
336 :     my @rough_tips;
337 : overbeek 1.1
338 : golsen 1.14 if ( $rough_tree )
339 :     {
340 :     if ( gjonewicklib::is_overbeek_tree( $rough_tree ) )
341 :     {
342 :     $rough_tree = gjonewicklib::overbeek_to_gjonewick( $rough_tree )
343 :     }
344 : overbeek 1.1
345 : golsen 1.14 # Check the tip content:
346 : overbeek 1.1
347 : golsen 1.14 @rough_tips = gjonewicklib::newick_tip_list( $rough_tree );
348 :     my %rough_tips = map { $_ => 1 } @rough_tips;
349 :    
350 :     # Missing tips?
351 :     # We do not require that all of the sequences in $tree be in the
352 :     # rough_tree. But there will almost certainly be a need for a
353 :     # significant number of sequences beyond those to be added. The
354 :     # user is responsible for a resonsonable representation in the
355 :     # rough_tree, if it is supplied.
356 :    
357 :     my @missing = grep { ! $rough_tips{ $_ } } @ids_to_ins;
358 :     if ( @missing )
359 :     {
360 :     print STDERR "add_seq_to_big_tree:\n";
361 :     print STDERR " The supplied guide tree is missing the following ids:\n";
362 :     print STDERR " '", join( "', '", @missing ), "'\n";
363 :     print STDERR " Rebuilding using gjophylip::protdist_neighbor.\n";
364 :     $rough_tree = undef;
365 :     }
366 :     }
367 :    
368 :     # If there is a problem with the current guide tree, make one:
369 :    
370 :     if ( ! $rough_tree )
371 : overbeek 1.1 {
372 : golsen 1.14 my %nj_opts = ( alignment => [ map { $in_align{ $_ } } @$tips, @ids_to_ins ],
373 :     tree_format => 'gjo',
374 :     &tree_options( \%args )
375 :     );
376 :     $rough_tree = gjophylip::protdist_neighbor( \%nj_opts );
377 :     if ( ! $rough_tree )
378 :     {
379 :     print STDERR "add_seq_to_big_tree:\n";
380 :     print STDERR " Failed to create protdist_neighbor tree\n";
381 :     print STDERR " Tree is unchanged.\n";
382 :     return $tree0;
383 :     }
384 :    
385 :     @rough_tips = ( @$tips, @ids_to_ins );
386 : overbeek 1.1 }
387 : golsen 1.11
388 : golsen 1.14 # Set up ML options, only the id_to_ins, subalignment and subtree change:
389 :    
390 :     my %options = ( tree_format => 'gjo',
391 :     &tree_options( \%args )
392 :     );
393 :    
394 :     my ( $current_approx, $node1, $x1, $node2, $x2, $x, $fraction,
395 :     $tree2, $subtree, @st_tips
396 :     );
397 :    
398 :     # Add sequences one at a time:
399 :     #
400 :     # $tree - The current tree to which a new tip is being added.
401 :     #
402 :     # $rough_tree - An approximate tree with new tips added.
403 :     #
404 :     # $current_approx - A tree with current approximation of the insertion
405 :     # point. Initially this is drawn from $rough_tree,
406 :     # but later it is based on insertion into a
407 :     # neighborhood of $tree.
408 :     #
409 :     # $subtree - The extracted neighborhood of $tree to which a tip
410 :     # is to be added.
411 :    
412 :     foreach $id_to_ins ( @ids_to_ins )
413 :     {
414 : overbeek 1.15 print STDERR "inserting $id_to_ins\n" if $ENV{'VERBOSE'};
415 :    
416 : golsen 1.14 $options{ id } = $id_to_ins;
417 :    
418 :     # Use rough_tree for initial current_approx, removing extra tips:
419 :    
420 :     $current_approx = gjonewicklib::copy_newick_tree( $rough_tree );
421 :     if ( grep { ! $in_tree{ $_ } && ( $_ ne $id_to_ins ) } @rough_tips )
422 :     {
423 :     $current_approx = gjonewicklib::newick_subtree( $current_approx, $id_to_ins, @$tips );
424 :     if ( ! gjonewicklib::newick_is_unrooted( $current_approx ) )
425 :     {
426 :     $current_approx = gjonewicklib::uproot_newick( $current_approx );
427 :     }
428 :     }
429 :    
430 :     # Pull out neighborhoods, insert and interate until it repeats
431 :     # the same neighborhood.
432 :    
433 :     my %seen = ();
434 :     while ( 1 )
435 :     {
436 : overbeek 1.15 print STDERR " looking for insertion point\n" if $ENV{'VERBOSE'};
437 :    
438 : golsen 1.14 ( $node1, $x1, $node2, $x2, $x ) =
439 :     gjonewicklib::newick_tip_insertion_point( $current_approx, $id_to_ins );
440 : overbeek 1.15 print STDERR " got it\n" if $ENV{'VERBOSE'};
441 : golsen 1.14
442 :     # Project tip insertion point onto $tree and reroot at that location:
443 :    
444 :     $x1 = 0 if $x1 < 0;
445 :     $x2 = 0 if $x2 < 0;
446 :     $fraction = ( $x1 + $x2 > 0 ) ? $x1 / ( $x1 + $x2 ) : 0.5;
447 :     $subtree = gjonewicklib::copy_newick_tree( $tree );
448 :     $subtree = gjonewicklib::reroot_newick_between_nodes( $subtree, $node1, $node2, $fraction );
449 : overbeek 1.15 print STDERR " got subtree\n" if $ENV{'VERBOSE'};
450 : golsen 1.14
451 :     # For this rooting, prioritize tips for representing their group:
452 :    
453 :     my %tip_dist = gjonewicklib::newick_tip_distances( $subtree );
454 :     my %tip_priority = map{ $_ => &$tip_priority( $tip_dist{ $_ }, $inform{ $_ } ) } @$tips;
455 : overbeek 1.15 print STDERR " got tip_priority\n" if $ENV{'VERBOSE'};
456 : golsen 1.14
457 :     # Extract the subtree:
458 :    
459 :     $subtree = gjonewicklib::root_neighborhood_representative_tree( $subtree, $size_rep, \%tip_priority );
460 :     @st_tips = gjonewicklib::newick_tip_list( $subtree );
461 :    
462 :     # Break if we have already pulled this subset of tips:
463 :    
464 :     my $tip_string = join( "\t", sort @st_tips );
465 :     last if $seen{ $tip_string }++;
466 :    
467 :     print STDERR "$tip_string\n" if $ENV{'VERBOSE'};
468 :    
469 :     if ( ! gjonewicklib::newick_is_unrooted( $subtree ) )
470 :     {
471 :     $subtree = gjonewicklib::uproot_newick( $subtree );
472 :     }
473 :     gjonewicklib::printer_plot_newick( $subtree, \*STDOUT, undef, 1, 1 ) if $ENV{'VERBOSE'};
474 :    
475 :     # Do the ML insertion:
476 :    
477 :     $options{ align } = [ map { $in_align{ $_ } } @st_tips, $id_to_ins ];
478 :     $options{ tree } = $subtree;
479 : overbeek 1.15 print STDERR "starting add_seq_to_tree\n" if $ENV{'VERBOSE'};
480 : golsen 1.14 $current_approx = &add_seq_to_tree( \%options );
481 : overbeek 1.15 print STDERR "back from add_seq_to_tree\n" if $ENV{'VERBOSE'};
482 : golsen 1.14 }
483 :    
484 : overbeek 1.15 print STDERR "got position\n" if $ENV{'VERBOSE'};
485 :    
486 : golsen 1.14 # Project this insertion point onto the full starting tree and insert
487 :     # (the location was computed above, for finding the neighborhood):
488 :    
489 :     my $newtip = [ undef, $id_to_ins, $x ];
490 :     $tree = newick_insert_between_nodes( $tree, $newtip, $node1, $node2, $fraction );
491 :    
492 :     push @$tips, $id_to_ins;
493 :     $in_tree{ $id_to_ins } = 1;
494 :    
495 :     if ( $checkpoint && open( CHECKPOINT, ">>$checkpoint" ) )
496 :     {
497 :     writeNewickTree( $tree, \*CHECKPOINT );
498 :     close( CHECKPOINT );
499 :     }
500 :     }
501 : overbeek 1.15 print STDERR "exitng\n" if $ENV{'VERBOSE'};
502 : golsen 1.14 ( $format eq 'overbeek' ) ? gjonewicklib::gjonewick_to_overbeek( $tree )
503 :     : $tree;
504 :     }
505 :    
506 :    
507 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
508 :     # tree_options -- filter tree options out of %args:
509 :     #
510 :     # ( key => value, key => value ... ) = tree_options( \%args )
511 :     # ( key => value, key => value ... ) = tree_options( %args )
512 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
513 :     sub tree_options
514 :     {
515 :     my %args = ( ref( $_[0] ) eq 'HASH' ) ? %{$_[0]} : @_;
516 :     my %tree_opt = map { canonical_key($_) => 1 }
517 :     qw( alpha categories coef_of_var
518 :     gamma_bins invar_frac model
519 :     neighbor persistance proml
520 :     protdist rate_hmm tmp
521 :     tmp_dir weights
522 :     );
523 :    
524 :     map { $_ => $args{ $_ } } grep { $tree_opt{canonical_key($_)} } keys %args;
525 :     }
526 :    
527 :    
528 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
529 :     # canonical_key -- canonical form of a key for accepting variants with
530 :     # alternaive case, underscores or terminal s.
531 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
532 :     sub canonical_key { my $key = lc shift; $key =~ s/_//g; $key =~ s/s$//; $key }
533 :    
534 :    
535 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
536 :     # informative_sites -- positions that are useful for assessing
537 :     #
538 :     # $sites = informative_sites( $seq )
539 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
540 :     sub informative_sites
541 :     {
542 :     my $seq = uc shift;
543 :     $seq =~ s/[^ACDEFGHIKLMNPQRSTUVWY]+//g;
544 :     my $nt = $seq =~ tr/ACGNTU//;
545 :     ( $nt >= 0.8 * length $seq ) ? $seq =~ tr/ACGTU//
546 :     : $seq =~ tr/ACDEFGHIKLMNPQRSTVWY//
547 : overbeek 1.1 }
548 :    
549 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3