[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (view) (download) (as text)

1 : overbeek 1.1 package proml;
2 :    
3 :     #===============================================================================
4 :     # A perl interface to the proml program in the PHYLIP program package
5 :     #
6 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
7 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
8 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
9 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
10 :     #
11 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
12 :     #
13 :     #-------------------------------------------------------------------------------
14 :     # A perl interface for using proml to estimate site-specific rates of change
15 :     #
16 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17 :     #
18 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
19 : overbeek 1.1 #
20 :     #===============================================================================
21 :     #
22 :     # A perl interface to the proml program in the PHYLIP program package
23 :     #
24 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
25 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
26 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
27 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
28 :     #
29 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
30 :     #
31 :     # options:
32 :     #
33 :     # For proml:
34 :     # alignment => \@alignment the way to supply the alignment as an option, rather than first param
35 :     # alpha => float alpha parameter of gamma distribution (0.5 - inf)
36 : golsen 1.5 # categories => [ [ rate1, ... ], site_categories ]
37 : overbeek 1.1 # coef_of_var => float 1/sqrt(alpha) for gamma distribution (D = 0)
38 :     # gamma_bins => int number of rate categories used to approximate gamma (D=5)
39 :     # global => bool global rearrangements
40 :     # invar_frac => 0 - 1 fraction of site that are invariant
41 :     # jumble_seed => odd int jumble random seed
42 :     # model => model evolution model JTT (D) | PMB | PAM
43 :     # n_jumble => int number of jumbles
44 :     # persistance => float persistance length of rate category
45 : golsen 1.5 # rate_hmm => [ [ rate, prior_prob ] ... ] # not implimented
46 : overbeek 1.1 # rearrange => [ trees ] rearrange user trees
47 :     # slow => bool more accurate but slower search (D = 0)
48 :     # user_lengths => bool use supplied branch lengths
49 :     # user_trees => [ trees ] user trees
50 :     # weights => site_weights
51 :     #
52 :     # Other:
53 :     # keep_duplicates => bool do not remove duplicate sequences (D = false) [NOT IMPLIMENTED]
54 :     # program => program allows fully defined path
55 :     # tmp => directory directory for tmp_dir (D = /tmp or .)
56 :     # tmp_dir => directory directory for temporary files (D = $tmp/proml.$$)
57 :     # tree_format => overbeek | gjo | fig format of output tree
58 :     #
59 :     # tmp_dir is created and deleted unless its name is supplied, and it already
60 :     # exists.
61 :     #
62 :     #
63 :     # Options that do not require other data:
64 :     # G (global search toggle)
65 :     # L (user lengths toggle)
66 : golsen 1.5 # P (JTT / PMB / PAM cycle)
67 : overbeek 1.1 # S (slow and accurate)
68 :     # U (requires intree file)
69 :     # W (requires weights file)
70 :     #
71 :     # Some option data input orders:
72 :     #
73 :     # J
74 :     # Seed
75 :     # N reps
76 :     # Y
77 :     #
78 :     # R
79 :     # Y
80 :     # Coefficient of variation
81 :     # Rate categories
82 :     # Spurious random seed
83 :     #
84 :     # R
85 :     # R
86 :     # Y
87 :     # Coefficient of variation
88 :     # Gamma rate categories + 1
89 :     # Fraction invariant
90 :     # Spurious random seed
91 :     #
92 :     # C (requires categories file)
93 :     # N cat
94 :     # Rate values (n of them)
95 :    
96 :    
97 : golsen 1.8 use Data::Dumper;
98 :    
99 : overbeek 1.1 use strict;
100 :     use gjonewicklib qw( gjonewick_to_overbeek
101 :     newick_is_unrooted
102 :     newick_relabel_nodes
103 : golsen 1.5 newick_rescale_branches
104 : overbeek 1.1 newick_tree_length
105 :     overbeek_to_gjonewick
106 :     parse_newick_tree_str
107 :     strNewickTree
108 :     uproot_newick
109 :     );
110 :    
111 :     sub proml
112 :     {
113 :     my $align;
114 :     if ( ref( $_[0] ) eq 'ARRAY' )
115 :     {
116 :     $align = shift @_;
117 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
118 :     || ( ( print STDERR "proml::proml() called without alignment\n" )
119 :     && ( return () )
120 :     );
121 :     }
122 :    
123 :     my %options;
124 :     if ( $_[0] )
125 :     {
126 :     %options = ( ref( $_[0]) eq 'HASH' ) ? %{ $_[0] } : @_;
127 :     }
128 :    
129 :     #---------------------------------------------------------------------------
130 :     # Work on a copy of the alignment. Id is always first, seq is always last
131 :     #---------------------------------------------------------------------------
132 :    
133 :     $align ||= $options{ alignment } || $options{ align };
134 :    
135 :     my ( $seq, $id );
136 :     my %id;
137 :     my %local_id;
138 :     my $local_id = 'seq0000000';
139 :     my @align = map { $id = $_->[0];
140 :     $local_id++;
141 :     $id{ $local_id } = $id;
142 :     $local_id{ $id } = $local_id;
143 :     $seq = $_->[-1];
144 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
145 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
146 :     [ $local_id, $seq ]
147 :     } @$align;
148 :    
149 :     #---------------------------------------------------------------------------
150 :     # Process proml options:
151 :     #---------------------------------------------------------------------------
152 :    
153 : golsen 1.5 # [ [ cat_rate1, ... ], site_categories ]
154 :     # Original format expected first field to be number of categories (which
155 :     # is redundant). Handling that form is what the shift if all about.
156 :    
157 :     my $categories = $options{ categories }; # [ [ cat_rate1, ... ], site_categories ]
158 : overbeek 1.1 if ( $categories )
159 :     {
160 :     if ( ref( $categories ) ne 'ARRAY'
161 : golsen 1.5 || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
162 :     || ref( $categories->[0] ) ne 'ARRAY'
163 : overbeek 1.1 )
164 :     {
165 : golsen 1.9 print STDERR "proml::proml() categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
166 : overbeek 1.1 return ();
167 :     }
168 :    
169 :     # Rate values cannot have very many decimal places or proml can't read it:
170 :    
171 : golsen 1.5 @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
172 : overbeek 1.1 }
173 :    
174 :     my $coef_of_var = $options{ coef_of_var }
175 :     || ( $options{ alpha } && ( $options{ alpha } > 0) && ( 1 / sqrt( $options{ alpha } ) ) )
176 :     || 0;
177 :     if ( $coef_of_var < 0 )
178 :     {
179 : golsen 1.9 print STDERR "proml::proml() coef_of_var option value must be >= 0\n";
180 : overbeek 1.1 return ();
181 :     }
182 :    
183 :     my $gamma_bins = int( $options{ gamma_bins } || ( $coef_of_var ? 5 : 2 ) );
184 :     if ( ( $gamma_bins < 2 ) || ( $gamma_bins > 9 ) )
185 :     {
186 : golsen 1.9 print STDERR "proml::proml() gamma_bins option value must be > 1 and <= 9\n";
187 : overbeek 1.1 return ();
188 :     }
189 :    
190 :     my $global = $options{ global } || 0;
191 :    
192 :     my $invar_frac = $options{ invar_frac } || 0;
193 :     if ( $invar_frac && ( $invar_frac < 0 || $invar_frac >= 1 ) )
194 :     {
195 : golsen 1.9 print STDERR "proml::proml() invar_frac option value must be >= 0 and < 1\n";
196 : overbeek 1.1 return ();
197 :     }
198 :    
199 :     my $n_jumble = int( $options{ n_jumble } || ( $options{ jumble_seed } ? 1 : 0) );
200 :     if ( $n_jumble < 0 )
201 :     {
202 : golsen 1.9 print STDERR "proml::proml() n_jumble option value must be >= 0\n";
203 : overbeek 1.1 return ();
204 :     }
205 : golsen 1.5
206 : overbeek 1.1 my $jumble_seed = int( $options{ jumble_seed } || 4 * int( 499999999 * rand() ) + 1 );
207 :     if ( ( $jumble_seed <= 0) || ( $jumble_seed % 2 != 1 ) )
208 :     {
209 : golsen 1.9 print STDERR "proml::proml() jumble_seed option value must be an odd number > 0\n";
210 : overbeek 1.1 return ();
211 :     }
212 :    
213 :     my $model = ( $options{ model } =~ m/PAM/i ) ? 'PAM'
214 :     : ( $options{ model } =~ m/Dayhoff/i ) ? 'PAM'
215 :     : ( $options{ model } =~ m/PMB/i ) ? 'PMB'
216 :     : ( $options{ model } =~ m/Henikoff/i ) ? 'PMB'
217 :     : ( $options{ model } =~ m/Tillier/i ) ? 'PMB'
218 :     : ( $options{ model } =~ m/JTT/i ) ? 'JTT'
219 :     : ( $options{ model } =~ m/Jones/i ) ? 'JTT'
220 :     : ( $options{ model } =~ m/Taylor/i ) ? 'JTT'
221 :     : ( $options{ model } =~ m/Thornton/i ) ? 'JTT'
222 :     : 'JTT';
223 :    
224 :     my $persistance = $options{ persistance } || 0;
225 :     if ( $persistance && ( $persistance <= 1 ) )
226 :     {
227 : golsen 1.9 print STDERR "proml::proml() persistance option value must be > 1\n";
228 : overbeek 1.1 return ();
229 :     }
230 : golsen 1.5
231 : overbeek 1.1 my $rearrange = $options{ rearrange };
232 :    
233 :     my $slow = $options{ slow };
234 :    
235 :     my $user_lengths = $options{ user_lengths };
236 :    
237 :     my $user_trees = $options{ user_trees } || $rearrange;
238 :    
239 :     if ( $user_trees )
240 :     {
241 :     if ( ( ref( $user_trees ) ne 'ARRAY' ) || ( ! @$user_trees ) )
242 :     {
243 :     $user_trees = undef; # No trees
244 :     }
245 :     elsif ( ref( $user_trees->[0] ) ne 'ARRAY' ) # First element not tree
246 :     {
247 : golsen 1.9 print STDERR "proml::proml() user_trees or rearrange option value must be reference to list of trees\n";
248 : overbeek 1.1 return ();
249 :     }
250 :     }
251 :    
252 :     my $weights = $options{ weights };
253 : golsen 1.5
254 : overbeek 1.1 #---------------------------------------------------------------------------
255 :     # Options that are not proml options per se:
256 :     #---------------------------------------------------------------------------
257 :    
258 : golsen 1.5 my $program = $options{ program } || 'proml';
259 : overbeek 1.1
260 :     my $tree_format = $options{ tree_format } =~ m/overbeek/i ? 'overbeek'
261 :     : $options{ tree_format } =~ m/gjo/i ? 'gjonewick'
262 :     : $options{ tree_format } =~ m/fig/i ? 'fig'
263 :     : 'overbeek'; # Default
264 :    
265 : golsen 1.9 my ( $tmp_dir, $save_tmp ) = temporary_directory( \%options );
266 : overbeek 1.1
267 :     #---------------------------------------------------------------------------
268 :     # Prepare data:
269 :     #---------------------------------------------------------------------------
270 :     #
271 :     # For simplicity, we will convert overbeek trees to gjo newick trees.
272 :     #
273 :     # gjonewick tree node: [ \@desc, $label, $x, \@c1, \@c2, \@c3, \@c4, \@c5 ]
274 :     #
275 :     # overbeek tree node: [ Label, DistanceToParent,
276 :     # [ ParentPointer, ChildPointer1, ... ],
277 :     # [ Name1\tVal1, Name2\tVal2, ... ]
278 :     # ]
279 :     # Root node of gjonewick always has a descendent list. If the first
280 :     # field of the first tree is not an array reference, they are overbeek
281 : golsen 1.8 # trees.
282 : overbeek 1.1
283 :     my @user_trees = ();
284 : golsen 1.8 if ( @$user_trees )
285 : overbeek 1.1 {
286 : golsen 1.8 if ( ref( @$user_trees[0]->[0] ) ne 'ARRAY' ) # overbeek trees
287 : overbeek 1.1 {
288 : golsen 1.8 @user_trees = map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
289 : overbeek 1.1 @$user_trees;
290 :     }
291 :     else
292 :     {
293 : golsen 1.8 @user_trees = map { gjonewicklib::copy_newick_tree( $_ ) }
294 : overbeek 1.1 @$user_trees;
295 :     }
296 :    
297 : golsen 1.8 # Relabel and make sure trees are unrooted:
298 : overbeek 1.1
299 :     @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
300 :     : gjonewicklib::uproot_newick( $_ )
301 :     }
302 : golsen 1.8 map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ); $_ }
303 : overbeek 1.1 @user_trees;
304 :     }
305 :    
306 :     #---------------------------------------------------------------------------
307 :     # Write the files and run the program:
308 :     #---------------------------------------------------------------------------
309 :    
310 :     my $cwd = $ENV{ cwd } || `pwd`;
311 :     chomp $cwd;
312 :     chdir $tmp_dir;
313 :    
314 :     unlink 'outfile' if -f 'outfile'; # Just checking
315 :     unlink 'outtree' if -f 'outtree'; # ditto
316 :    
317 : golsen 1.6 &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
318 : overbeek 1.1 and chdir $cwd
319 :     and return ();
320 :    
321 : golsen 1.5 open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
322 :     and chdir $cwd
323 :     and return ();
324 :    
325 : overbeek 1.1
326 : golsen 1.6 # Start writing options for program:
327 : overbeek 1.1
328 :     if ( $categories )
329 :     {
330 : golsen 1.5 &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
331 : overbeek 1.1 and chdir $cwd
332 :     and return ();
333 :     print PROML "C\n",
334 : golsen 1.5 scalar @{$categories->[0]}, "\n",
335 :     join( ' ', @{ $categories->[0] } ), "\n";
336 : overbeek 1.1 }
337 :    
338 :     if ( $invar_frac || $coef_of_var )
339 :     {
340 :     print PROML "R\n";
341 :     print PROML "R\n" if $invar_frac;
342 :     print PROML "A\n", "$persistance\n" if $persistance;
343 : golsen 1.5
344 : overbeek 1.1 }
345 :    
346 :     print PROML "G\n" if $global;
347 :    
348 :     print PROML "J\n", "$jumble_seed\n", "$n_jumble\n" if $n_jumble;
349 :    
350 :     print PROML "P\n" if $model =~ m/PMB/i;
351 :     print PROML "P\nP\n" if $model =~ m/PAM/i;
352 :    
353 :     if ( @user_trees )
354 :     {
355 : golsen 1.6 &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
356 : overbeek 1.1 and chdir $cwd
357 :     and return ();
358 :     print PROML "U\n";
359 :     print PROML "V\n" if $rearrange || $global;
360 :     print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
361 :     }
362 : golsen 1.7 elsif ( $slow ) # Slow and user trees are mutually exclusive
363 :     {
364 :     print PROML "S\n";
365 :     }
366 : overbeek 1.1
367 :     if ( $weights )
368 :     {
369 : golsen 1.5 &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
370 : overbeek 1.1 and chdir $cwd
371 :     and return ();
372 :     print PROML "W\n";
373 :     }
374 :    
375 : golsen 1.7 # All the options are written, try to launch the run:
376 : overbeek 1.1
377 :     print PROML "Y\n";
378 :    
379 :     # Becuase of the options interface, these values have to be supplied after
380 :     # the Y:
381 :    
382 :     if ( $invar_frac || $coef_of_var )
383 :     {
384 :     if ( $invar_frac )
385 :     {
386 :     if ( $coef_of_var ) { $gamma_bins++ if ( $gamma_bins < 9 ) }
387 :     else { $gamma_bins = 2 }
388 :     }
389 :     print PROML "$coef_of_var\n";
390 :     print PROML "$gamma_bins\n";
391 : golsen 1.7 print PROML "$invar_frac\n" if $invar_frac;
392 : overbeek 1.1 }
393 : golsen 1.7
394 :     if ( $user_trees )
395 : overbeek 1.1 {
396 :     print PROML "13\n"; # Random number seed of unknown use
397 :     }
398 :    
399 :     close PROML;
400 : golsen 1.5
401 : overbeek 1.1 system "$program < params > /dev/null";
402 :    
403 :     my @likelihoods = &read_outfile();
404 :    
405 :     my @trees = gjonewicklib::read_newick_trees( 'outtree' );
406 : golsen 1.8 @trees or print STDERR "proml::proml: Could not read proml outtree file\n"
407 : overbeek 1.1 and chdir $cwd
408 :     and return ();
409 :    
410 :     # We are done, go back to the original directory:
411 :    
412 :     chdir $cwd;
413 :    
414 :     # Returned trees have our labels, and branch lengths that are in % change,
415 :     # not the more usual expected number per position:
416 :    
417 : golsen 1.9 @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) } @trees;
418 : overbeek 1.1
419 :     if ( $tree_format =~ m/overbeek/i )
420 :     {
421 :     @trees = map { gjonewicklib::gjonewick_to_overbeek( $_ ) } @trees;
422 :     }
423 :    
424 :     system "/bin/rm -r $tmp_dir" if ! $save_tmp;
425 :    
426 :     return map { [ $_, shift @likelihoods ] } @trees;
427 :     }
428 :    
429 :    
430 :     #-------------------------------------------------------------------------------
431 :     # A perl interface for using proml to estimate site-specific rates of change
432 :     #
433 : golsen 1.6 # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, %proml_opts )
434 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
435 : overbeek 1.1 #
436 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
437 : overbeek 1.1 #
438 :     # $alignment = [ [ id, def, seq ], ... ]
439 :     # or
440 :     # [ [ id, seq ], ... ]
441 :     #
442 :     # $tree = overbeek tree or gjonewick tree
443 :     #
444 :     # proml_opts is list of key value pairs, or reference to a hash
445 :     #-------------------------------------------------------------------------------
446 :    
447 :     sub estimate_protein_site_rates
448 :     {
449 :     my ( $align, $tree, @proml_opts ) = @_;
450 :    
451 : golsen 1.5 my ( $seq, $id );
452 :     my %local_id;
453 :     my $local_id = 'seq0000000';
454 :     my @align = map { $id = $_->[0];
455 :     $local_id{ $id } = ++$local_id;
456 :     $seq = $_->[-1];
457 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
458 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
459 :     [ $local_id, $seq ]
460 :     } @$align;
461 : overbeek 1.1
462 : golsen 1.5 # Make the tree a gjonewick tree, uproot it, and change to the local ids.
463 : overbeek 1.1
464 :     if ( ref( $tree->[0] ) ne 'ARRAY' ) # overbeek tree
465 :     {
466 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
467 :     }
468 : golsen 1.5 else
469 :     {
470 :     $tree = gjonewicklib::copy_newick_tree( $tree );
471 :     }
472 :    
473 :     $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
474 :    
475 :     gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
476 : overbeek 1.1
477 :     # The minimum rate will be 1/2 change per total tree branch length.
478 :     # This needs to be checked for proml. The intent is that he optimal
479 :     # rate for a site with one amino acid change is twice this value.
480 :    
481 :     my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
482 :    
483 :     # Generate "rate variation" by rescaling the supplied tree. We could use a
484 :     # finer grain estimator, then categorize the inferred values. This might
485 :     # work slightly better (this is what DNArates currently does).
486 :    
487 :     my $f = exp( log( 2 ) / 1 ); # Interval of 2
488 :     my @rates = map { $kmin * $f**$_ } ( 0 .. 16 ); # kmin .. 65000 * kmin in 17 bins
489 :     my @cat_vals = ( 1 .. 17 );
490 :     my @trees;
491 :     my $rate;
492 :     foreach $rate ( @rates )
493 :     {
494 :     my $tr = gjonewicklib::copy_newick_tree( $tree );
495 :     gjonewicklib::newick_rescale_branches( $tr, $rate ); # Rescales in place
496 :     push @trees, $tr;
497 :     }
498 :    
499 :     # Adjust (a copy of) the proml opts:
500 :    
501 :     my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
502 : golsen 1.6
503 : overbeek 1.1 $proml_opts{ user_lengths } = 1;
504 :     $proml_opts{ user_trees } = \@trees;
505 :     $proml_opts{ tree_format } = 'gjo';
506 :    
507 : golsen 1.6 delete $proml_opts{ alpha } if exists $proml_opts{ alpha };
508 :     delete $proml_opts{ categories } if exists $proml_opts{ categories };
509 :     delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
510 :     delete $proml_opts{ gamma_bins } if exists $proml_opts{ gamma_bins };
511 :     delete $proml_opts{ invar_frac } if exists $proml_opts{ invar_frac };
512 :     delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
513 :     delete $proml_opts{ n_jumble } if exists $proml_opts{ n_jumble };
514 :     delete $proml_opts{ rearrange } if exists $proml_opts{ rearrange };
515 :    
516 :     # Work throught the sites, finding their optimal rates/categories:
517 : overbeek 1.1
518 :     my @categories;
519 :     my @weights;
520 :     my $imax = length( $align[0]->[-1] );
521 :     for ( my $i = 0; $i < $imax; $i++ )
522 :     {
523 :     my $inform = 0;
524 :     my @align2 = map { my $c = substr( $_->[-1], $i, 1 );
525 : golsen 1.5 $inform++ if ( $c =~ m/[ACDEFGHIKLMNPQRSTVWY]/i );
526 : overbeek 1.1 [ $_->[0], $c ]
527 :     }
528 :     @align;
529 :    
530 :     # Only analyze the rate if there are 4 or more informative sequences:
531 :    
532 :     if ( $inform >= 4 )
533 :     {
534 :     my @results = proml::proml( \@align2, \%proml_opts );
535 :    
536 :     my ( $best ) = sort { $b->[1] <=> $a->[1] }
537 :     map { [ $_, @{ shift @results }[1] ] } # get the likelihoods
538 :     @cat_vals;
539 :    
540 : overbeek 1.3 # printf STDERR "%6d %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
541 : overbeek 1.1 push @categories, $best->[0];
542 :     push @weights, 1;
543 :     }
544 :     else
545 :     {
546 :     push @categories, 9;
547 :     push @weights, 0;
548 :     }
549 :     }
550 :    
551 :     # Find the minimum category value to appear:
552 :    
553 :     my ( $mincat ) = sort { $a <=> $b } @categories;
554 :     my $adjust = $mincat - 1;
555 :    
556 :     @categories = map { min( $_ - $adjust, 9 ) } @categories;
557 :     @rates = @rates[ $adjust .. ( $adjust+8 ) ];
558 :    
559 :     # Return category and weight data:
560 :    
561 : golsen 1.5 ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
562 : overbeek 1.1 }
563 :    
564 :    
565 :     #-------------------------------------------------------------------------------
566 :     # Auxiliary functions:
567 :     #-------------------------------------------------------------------------------
568 :    
569 : golsen 1.9 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
570 :    
571 :    
572 : overbeek 1.1 sub write_infile
573 :     {
574 :     open( INFILE, '>infile' ) or return 0;
575 :     print INFILE scalar @_, ' ', length( $_[0]->[1] ), "\n";
576 :     foreach ( @_ ) { printf INFILE "%-10s %s\n", @$_ }
577 :     close( INFILE );
578 :     }
579 :    
580 :    
581 :     sub write_intree
582 :     {
583 :     open( INTREE, '>intree' ) or return 0;
584 :     print INTREE scalar @_, "\n";
585 :     foreach ( @_ ) { print INTREE gjonewicklib::strNewickTree( $_ ), "\n" }
586 :     close( INTREE );
587 :     }
588 :    
589 :    
590 :     sub write_categories
591 :     {
592 :     my $categories = shift;
593 :     open( CATEGORIES, '>categories' ) or return 0;
594 :     print CATEGORIES "$categories\n";
595 :     close( CATEGORIES );
596 :     }
597 :    
598 :    
599 :     sub write_weights
600 :     {
601 :     my $weights = shift;
602 :     open( WEIGHTS, '>weights' ) or return 0;
603 :     print WEIGHTS "$weights\n";
604 :     close( WEIGHTS );
605 :     }
606 :    
607 :    
608 :     sub read_outfile
609 :     {
610 :     open( OUTFILE, '<outfile' ) or return ();
611 : golsen 1.5 my @likelihoods = map { chomp; s/.* //; $_ }
612 :     grep { /^Ln Likelihood/ }
613 :     <OUTFILE>;
614 : overbeek 1.1 close( OUTFILE );
615 :     return @likelihoods;
616 :     }
617 :    
618 :    
619 : golsen 1.9 #-------------------------------------------------------------------------------
620 :     # The SEED has things in special places. Be aware of them if running SEED.
621 :     #-------------------------------------------------------------------------------
622 :    
623 :     my $tmp; # The default place for temp files or directories
624 :     my $ext_bin; # FIG path to external binaries
625 :    
626 :     eval { require FIG_Config;
627 :     $tmp = $FIG_Config::temp;
628 :     $ext_bin = $FIG_Config::ext_bin;
629 :     };
630 :    
631 :    
632 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
633 :     # $program = executable( $program, \%options )
634 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
635 :     sub executable
636 :     {
637 :     my ( $program, $options ) = @_;
638 :     return undef if ! $program;
639 :    
640 :     $options ||= {};
641 :    
642 :     return $options->{ $program } ? $options->{ $program } # explicit?
643 :     : $ext_bin ? "$ext_bin/$program" # SEED?
644 :     : $program;
645 :     }
646 :    
647 :    
648 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
649 :     # Find or create a temporary directory.
650 :     # If it does not exist, create it. If it exists, mark it for saving.
651 :     #
652 :     # $tmp_dir = temporary_directory( \%options )
653 :     # ( $tmp_dir, $save_tmp ) = temporary_directory( \%options )
654 :     #
655 :     #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
656 :    
657 :     sub temporary_directory
658 :     {
659 :     my $options = shift || {};
660 :    
661 :     my $tmp_dir = $options->{ tmpdir } || $options->{ tmp_dir };
662 :     my $save_tmp = $options->{ savetmp } || $options->{ save_tmp } || '';
663 :    
664 :     if ( $tmp_dir )
665 :     {
666 :     if ( -d $tmp_dir ) { $options->{ savetmp } = $save_tmp = 1 }
667 :     }
668 :     else
669 :     {
670 :     if ( $options->{ tmp } && -d $options->{ tmp } )
671 :     {
672 :     $tmp = $options->{ tmp };
673 :     }
674 :     elsif ( ! $tmp || ! -d $tmp )
675 :     {
676 :     $options->{ tmp } = $tmp = -d '/tmp' ? '/tmp' : '.';
677 :     }
678 :    
679 :     $tmp_dir = sprintf( "$tmp/" . __PACKAGE__ . "_tmp.%05d.%09d", $$, int(1000000000*rand) );
680 :     $options->{ tmpdir } = $tmp_dir;
681 :     }
682 :    
683 :     if ( $tmp_dir && ! -d $tmp_dir )
684 :     {
685 :     mkdir $tmp_dir;
686 :     if ( ! -d $tmp_dir )
687 :     {
688 :     print STDERR __PACKAGE__ . "::temporary_directory could not create '$tmp_dir'\n";
689 :     $options->{ tmpdir } = $tmp_dir = undef;
690 :     }
691 :     }
692 :    
693 :     return wantarray ? ( $tmp_dir, $save_tmp ) : $tmp_dir;
694 :     }
695 :    
696 :    
697 : overbeek 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3