[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : overbeek 1.1 package proml;
2 :    
3 :     #===============================================================================
4 :     # A perl interface to the proml program in the PHYLIP program package
5 :     #
6 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
7 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
8 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
9 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
10 :     #
11 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
12 :     #
13 :     #-------------------------------------------------------------------------------
14 :     # A perl interface for using proml to estimate site-specific rates of change
15 :     #
16 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17 :     #
18 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
19 : overbeek 1.1 #
20 :     #===============================================================================
21 :     #
22 :     # A perl interface to the proml program in the PHYLIP program package
23 :     #
24 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
25 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
26 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
27 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
28 :     #
29 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
30 :     #
31 :     # options:
32 :     #
33 :     # For proml:
34 :     # alignment => \@alignment the way to supply the alignment as an option, rather than first param
35 :     # alpha => float alpha parameter of gamma distribution (0.5 - inf)
36 : golsen 1.5 # categories => [ [ rate1, ... ], site_categories ]
37 : overbeek 1.1 # coef_of_var => float 1/sqrt(alpha) for gamma distribution (D = 0)
38 :     # gamma_bins => int number of rate categories used to approximate gamma (D=5)
39 :     # global => bool global rearrangements
40 :     # invar_frac => 0 - 1 fraction of site that are invariant
41 :     # jumble_seed => odd int jumble random seed
42 :     # model => model evolution model JTT (D) | PMB | PAM
43 :     # n_jumble => int number of jumbles
44 :     # persistance => float persistance length of rate category
45 : golsen 1.5 # rate_hmm => [ [ rate, prior_prob ] ... ] # not implimented
46 : overbeek 1.1 # rearrange => [ trees ] rearrange user trees
47 :     # slow => bool more accurate but slower search (D = 0)
48 :     # user_lengths => bool use supplied branch lengths
49 :     # user_trees => [ trees ] user trees
50 :     # weights => site_weights
51 :     #
52 :     # Other:
53 :     # keep_duplicates => bool do not remove duplicate sequences (D = false) [NOT IMPLIMENTED]
54 :     # program => program allows fully defined path
55 :     # tmp => directory directory for tmp_dir (D = /tmp or .)
56 :     # tmp_dir => directory directory for temporary files (D = $tmp/proml.$$)
57 :     # tree_format => overbeek | gjo | fig format of output tree
58 :     #
59 :     # tmp_dir is created and deleted unless its name is supplied, and it already
60 :     # exists.
61 :     #
62 :     #
63 :     # Options that do not require other data:
64 :     # G (global search toggle)
65 :     # L (user lengths toggle)
66 : golsen 1.5 # P (JTT / PMB / PAM cycle)
67 : overbeek 1.1 # S (slow and accurate)
68 :     # U (requires intree file)
69 :     # W (requires weights file)
70 :     #
71 :     # Some option data input orders:
72 :     #
73 :     # J
74 :     # Seed
75 :     # N reps
76 :     # Y
77 :     #
78 :     # R
79 :     # Y
80 :     # Coefficient of variation
81 :     # Rate categories
82 :     # Spurious random seed
83 :     #
84 :     # R
85 :     # R
86 :     # Y
87 :     # Coefficient of variation
88 :     # Gamma rate categories + 1
89 :     # Fraction invariant
90 :     # Spurious random seed
91 :     #
92 :     # C (requires categories file)
93 :     # N cat
94 :     # Rate values (n of them)
95 :    
96 :    
97 :     use strict;
98 :     use gjonewicklib qw( gjonewick_to_overbeek
99 :     newick_is_unrooted
100 :     newick_relabel_nodes
101 : golsen 1.5 newick_rescale_branches
102 : overbeek 1.1 newick_tree_length
103 :     overbeek_to_gjonewick
104 :     parse_newick_tree_str
105 :     strNewickTree
106 :     uproot_newick
107 :     );
108 :    
109 :    
110 :     sub proml
111 :     {
112 :     my $align;
113 :     if ( ref( $_[0] ) eq 'ARRAY' )
114 :     {
115 :     $align = shift @_;
116 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
117 :     || ( ( print STDERR "proml::proml() called without alignment\n" )
118 :     && ( return () )
119 :     );
120 :     }
121 :    
122 :     my %options;
123 :     if ( $_[0] )
124 :     {
125 :     %options = ( ref( $_[0]) eq 'HASH' ) ? %{ $_[0] } : @_;
126 :     }
127 :    
128 :     #---------------------------------------------------------------------------
129 :     # Work on a copy of the alignment. Id is always first, seq is always last
130 :     #---------------------------------------------------------------------------
131 :    
132 :     $align ||= $options{ alignment } || $options{ align };
133 :    
134 :     my ( $seq, $id );
135 :     my %id;
136 :     my %local_id;
137 :     my $local_id = 'seq0000000';
138 :     my @align = map { $id = $_->[0];
139 :     $local_id++;
140 :     $id{ $local_id } = $id;
141 :     $local_id{ $id } = $local_id;
142 :     $seq = $_->[-1];
143 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
144 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
145 :     [ $local_id, $seq ]
146 :     } @$align;
147 :    
148 :     #---------------------------------------------------------------------------
149 :     # Process proml options:
150 :     #---------------------------------------------------------------------------
151 :    
152 : golsen 1.5 # [ [ cat_rate1, ... ], site_categories ]
153 :     # Original format expected first field to be number of categories (which
154 :     # is redundant). Handling that form is what the shift if all about.
155 :    
156 :     my $categories = $options{ categories }; # [ [ cat_rate1, ... ], site_categories ]
157 : overbeek 1.1 if ( $categories )
158 :     {
159 :     if ( ref( $categories ) ne 'ARRAY'
160 : golsen 1.5 || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
161 :     || ref( $categories->[0] ) ne 'ARRAY'
162 : overbeek 1.1 )
163 :     {
164 : golsen 1.5 print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
165 : overbeek 1.1 return ();
166 :     }
167 :    
168 :     # Rate values cannot have very many decimal places or proml can't read it:
169 :    
170 : golsen 1.5 @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
171 : overbeek 1.1 }
172 :    
173 :     my $coef_of_var = $options{ coef_of_var }
174 :     || ( $options{ alpha } && ( $options{ alpha } > 0) && ( 1 / sqrt( $options{ alpha } ) ) )
175 :     || 0;
176 :     if ( $coef_of_var < 0 )
177 :     {
178 :     print STDERR "proml::proml coef_of_var option value must be >= 0\n";
179 :     return ();
180 :     }
181 :    
182 :     my $gamma_bins = int( $options{ gamma_bins } || ( $coef_of_var ? 5 : 2 ) );
183 :     if ( ( $gamma_bins < 2 ) || ( $gamma_bins > 9 ) )
184 :     {
185 :     print STDERR "proml::proml gamma_bins option value must be > 1 and <= 9\n";
186 :     return ();
187 :     }
188 :    
189 :     my $global = $options{ global } || 0;
190 :    
191 :     my $invar_frac = $options{ invar_frac } || 0;
192 :     if ( $invar_frac && ( $invar_frac < 0 || $invar_frac >= 1 ) )
193 :     {
194 :     print STDERR "proml::proml invar_frac option value must be >= 0 and < 1\n";
195 :     return ();
196 :     }
197 :    
198 :     my $n_jumble = int( $options{ n_jumble } || ( $options{ jumble_seed } ? 1 : 0) );
199 :     if ( $n_jumble < 0 )
200 :     {
201 :     print STDERR "proml::proml n_jumble option value must be >= 0\n";
202 :     return ();
203 :     }
204 : golsen 1.5
205 : overbeek 1.1 my $jumble_seed = int( $options{ jumble_seed } || 4 * int( 499999999 * rand() ) + 1 );
206 :     if ( ( $jumble_seed <= 0) || ( $jumble_seed % 2 != 1 ) )
207 :     {
208 :     print STDERR "proml::proml jumble_seed option value must be an odd number > 0\n";
209 :     return ();
210 :     }
211 :    
212 :     my $model = ( $options{ model } =~ m/PAM/i ) ? 'PAM'
213 :     : ( $options{ model } =~ m/Dayhoff/i ) ? 'PAM'
214 :     : ( $options{ model } =~ m/PMB/i ) ? 'PMB'
215 :     : ( $options{ model } =~ m/Henikoff/i ) ? 'PMB'
216 :     : ( $options{ model } =~ m/Tillier/i ) ? 'PMB'
217 :     : ( $options{ model } =~ m/JTT/i ) ? 'JTT'
218 :     : ( $options{ model } =~ m/Jones/i ) ? 'JTT'
219 :     : ( $options{ model } =~ m/Taylor/i ) ? 'JTT'
220 :     : ( $options{ model } =~ m/Thornton/i ) ? 'JTT'
221 :     : 'JTT';
222 :    
223 :     my $persistance = $options{ persistance } || 0;
224 :     if ( $persistance && ( $persistance <= 1 ) )
225 :     {
226 :     print STDERR "proml::proml persistance option value must be > 1\n";
227 :     return ();
228 :     }
229 : golsen 1.5
230 : overbeek 1.1 my $rearrange = $options{ rearrange };
231 :    
232 :     my $slow = $options{ slow };
233 :    
234 :     my $user_lengths = $options{ user_lengths };
235 :    
236 :     my $user_trees = $options{ user_trees } || $rearrange;
237 :    
238 :     if ( $user_trees )
239 :     {
240 :     if ( ( ref( $user_trees ) ne 'ARRAY' ) || ( ! @$user_trees ) )
241 :     {
242 :     $user_trees = undef; # No trees
243 :     }
244 :     elsif ( ref( $user_trees->[0] ) ne 'ARRAY' ) # First element not tree
245 :     {
246 :     print STDERR "proml::proml usertree or rearrange option value must be reference to list of trees\n";
247 :     return ();
248 :     }
249 :     }
250 :    
251 :     my $weights = $options{ weights };
252 : golsen 1.5
253 : overbeek 1.1
254 :     #---------------------------------------------------------------------------
255 :     # Options that are not proml options per se:
256 :     #---------------------------------------------------------------------------
257 :    
258 : golsen 1.5 my $program = $options{ program } || 'proml';
259 : overbeek 1.1
260 :     my $tmp = $options{ tmp };
261 :    
262 :     my $tmp_dir = $options{ tmp_dir };
263 :    
264 :     my $tree_format = $options{ tree_format } =~ m/overbeek/i ? 'overbeek'
265 :     : $options{ tree_format } =~ m/gjo/i ? 'gjonewick'
266 :     : $options{ tree_format } =~ m/fig/i ? 'fig'
267 :     : 'overbeek'; # Default
268 :    
269 :     my $save_tmp = $tmp_dir && -d $tmp_dir;
270 :     if ( $tmp_dir )
271 :     {
272 :     if ( -d $tmp_dir ) { $save_tmp = 1 }
273 :     else { mkdir $tmp_dir }
274 :     }
275 :     else
276 :     {
277 :     $tmp = $tmp && -d $tmp ? $tmp
278 :     : -d '/tmp' ? '/tmp'
279 :     : '.';
280 : overbeek 1.2 my $int = int( 1000000000 * rand);
281 :     $tmp_dir = "$tmp/proml.$$.$int";
282 : overbeek 1.1 mkdir $tmp_dir;
283 :     }
284 :    
285 :     #---------------------------------------------------------------------------
286 :     # Prepare data:
287 :     #---------------------------------------------------------------------------
288 :     #
289 :     # For simplicity, we will convert overbeek trees to gjo newick trees.
290 :     #
291 :     # gjonewick tree node: [ \@desc, $label, $x, \@c1, \@c2, \@c3, \@c4, \@c5 ]
292 :     #
293 :     # overbeek tree node: [ Label, DistanceToParent,
294 :     # [ ParentPointer, ChildPointer1, ... ],
295 :     # [ Name1\tVal1, Name2\tVal2, ... ]
296 :     # ]
297 :     # Root node of gjonewick always has a descendent list. If the first
298 :     # field of the first tree is not an array reference, they are overbeek
299 :     # trees. Also relabel tree tips to local ids.
300 :    
301 :     my @user_trees = ();
302 :     if ( $user_trees )
303 :     {
304 :     if ( @user_trees && ( ref( $user_trees[0]->[0] ) ne 'ARRAY' ) ) # overbeek trees
305 :     {
306 :     @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }
307 :     map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
308 :     @$user_trees;
309 :     }
310 :     else
311 :     {
312 :     @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }
313 :     @$user_trees;
314 :     }
315 :    
316 :     # Make sure trees are unrooted:
317 :    
318 :     @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
319 :     : gjonewicklib::uproot_newick( $_ )
320 :     }
321 :     @user_trees;
322 :     }
323 :    
324 :     #---------------------------------------------------------------------------
325 :     # Write the files and run the program:
326 :     #---------------------------------------------------------------------------
327 :    
328 :     my $cwd = $ENV{ cwd } || `pwd`;
329 :     chomp $cwd;
330 :     chdir $tmp_dir;
331 :    
332 :     unlink 'outfile' if -f 'outfile'; # Just checking
333 :     unlink 'outtree' if -f 'outtree'; # ditto
334 :    
335 : golsen 1.6 &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
336 : overbeek 1.1 and chdir $cwd
337 :     and return ();
338 :    
339 : golsen 1.5 open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
340 :     and chdir $cwd
341 :     and return ();
342 :    
343 : overbeek 1.1
344 : golsen 1.6 # Start writing options for program:
345 : overbeek 1.1
346 :     if ( $categories )
347 :     {
348 : golsen 1.5 &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
349 : overbeek 1.1 and chdir $cwd
350 :     and return ();
351 :     print PROML "C\n",
352 : golsen 1.5 scalar @{$categories->[0]}, "\n",
353 :     join( ' ', @{ $categories->[0] } ), "\n";
354 : overbeek 1.1 }
355 :    
356 :     if ( $invar_frac || $coef_of_var )
357 :     {
358 :     print PROML "R\n";
359 :     print PROML "R\n" if $invar_frac;
360 :     print PROML "A\n", "$persistance\n" if $persistance;
361 : golsen 1.5
362 : overbeek 1.1 }
363 :    
364 :     print PROML "G\n" if $global;
365 :    
366 :     print PROML "J\n", "$jumble_seed\n", "$n_jumble\n" if $n_jumble;
367 :    
368 :     print PROML "P\n" if $model =~ m/PMB/i;
369 :     print PROML "P\nP\n" if $model =~ m/PAM/i;
370 :    
371 :     if ( @user_trees )
372 :     {
373 : golsen 1.6 &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
374 : overbeek 1.1 and chdir $cwd
375 :     and return ();
376 :     print PROML "U\n";
377 :     print PROML "V\n" if $rearrange || $global;
378 :     print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
379 :     }
380 : golsen 1.7 elsif ( $slow ) # Slow and user trees are mutually exclusive
381 :     {
382 :     print PROML "S\n";
383 :     }
384 : overbeek 1.1
385 :     if ( $weights )
386 :     {
387 : golsen 1.5 &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
388 : overbeek 1.1 and chdir $cwd
389 :     and return ();
390 :     print PROML "W\n";
391 :     }
392 :    
393 : golsen 1.7 # All the options are written, try to launch the run:
394 : overbeek 1.1
395 :     print PROML "Y\n";
396 :    
397 :     # Becuase of the options interface, these values have to be supplied after
398 :     # the Y:
399 :    
400 :     if ( $invar_frac || $coef_of_var )
401 :     {
402 :     if ( $invar_frac )
403 :     {
404 :     if ( $coef_of_var ) { $gamma_bins++ if ( $gamma_bins < 9 ) }
405 :     else { $gamma_bins = 2 }
406 :     }
407 :     print PROML "$coef_of_var\n";
408 :     print PROML "$gamma_bins\n";
409 : golsen 1.7 print PROML "$invar_frac\n" if $invar_frac;
410 : overbeek 1.1 }
411 : golsen 1.7
412 :     if ( $user_trees )
413 : overbeek 1.1 {
414 :     print PROML "13\n"; # Random number seed of unknown use
415 :     }
416 :    
417 :     close PROML;
418 : golsen 1.5
419 : overbeek 1.1 system "$program < params > /dev/null";
420 :    
421 :     my @likelihoods = &read_outfile();
422 :    
423 :     my @trees = gjonewicklib::read_newick_trees( 'outtree' );
424 :     @trees or print STDERR "proml::proml: Could read proml outtree file\n"
425 :     and chdir $cwd
426 :     and return ();
427 :    
428 :     # We are done, go back to the original directory:
429 :    
430 :     chdir $cwd;
431 :    
432 :     # Returned trees have our labels, and branch lengths that are in % change,
433 :     # not the more usual expected number per position:
434 :    
435 : golsen 1.5 my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
436 :     @trees;
437 : overbeek 1.1
438 :     if ( $tree_format =~ m/overbeek/i )
439 :     {
440 :     @trees = map { gjonewicklib::gjonewick_to_overbeek( $_ ) } @trees;
441 :     }
442 :    
443 :     system "/bin/rm -r $tmp_dir" if ! $save_tmp;
444 :    
445 :     return map { [ $_, shift @likelihoods ] } @trees;
446 :     }
447 :    
448 :    
449 :     #-------------------------------------------------------------------------------
450 :     # A perl interface for using proml to estimate site-specific rates of change
451 :     #
452 : golsen 1.6 # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, %proml_opts )
453 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
454 : overbeek 1.1 #
455 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
456 : overbeek 1.1 #
457 :     # $alignment = [ [ id, def, seq ], ... ]
458 :     # or
459 :     # [ [ id, seq ], ... ]
460 :     #
461 :     # $tree = overbeek tree or gjonewick tree
462 :     #
463 :     # proml_opts is list of key value pairs, or reference to a hash
464 :     #-------------------------------------------------------------------------------
465 :    
466 :     sub estimate_protein_site_rates
467 :     {
468 :     my ( $align, $tree, @proml_opts ) = @_;
469 :    
470 : golsen 1.5 my ( $seq, $id );
471 :     my %local_id;
472 :     my $local_id = 'seq0000000';
473 :     my @align = map { $id = $_->[0];
474 :     $local_id{ $id } = ++$local_id;
475 :     $seq = $_->[-1];
476 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
477 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
478 :     [ $local_id, $seq ]
479 :     } @$align;
480 : overbeek 1.1
481 : golsen 1.5 # Make the tree a gjonewick tree, uproot it, and change to the local ids.
482 : overbeek 1.1
483 :     if ( ref( $tree->[0] ) ne 'ARRAY' ) # overbeek tree
484 :     {
485 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
486 :     }
487 : golsen 1.5 else
488 :     {
489 :     $tree = gjonewicklib::copy_newick_tree( $tree );
490 :     }
491 :    
492 :     $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
493 :    
494 :     gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
495 : overbeek 1.1
496 :     # The minimum rate will be 1/2 change per total tree branch length.
497 :     # This needs to be checked for proml. The intent is that he optimal
498 :     # rate for a site with one amino acid change is twice this value.
499 :    
500 :     my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
501 :    
502 :     # Generate "rate variation" by rescaling the supplied tree. We could use a
503 :     # finer grain estimator, then categorize the inferred values. This might
504 :     # work slightly better (this is what DNArates currently does).
505 :    
506 :     my $f = exp( log( 2 ) / 1 ); # Interval of 2
507 :     my @rates = map { $kmin * $f**$_ } ( 0 .. 16 ); # kmin .. 65000 * kmin in 17 bins
508 :     my @cat_vals = ( 1 .. 17 );
509 :     my @trees;
510 :     my $rate;
511 :     foreach $rate ( @rates )
512 :     {
513 :     my $tr = gjonewicklib::copy_newick_tree( $tree );
514 :     gjonewicklib::newick_rescale_branches( $tr, $rate ); # Rescales in place
515 :     push @trees, $tr;
516 :     }
517 :    
518 :     # Adjust (a copy of) the proml opts:
519 :    
520 :     my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
521 : golsen 1.6
522 : overbeek 1.1 $proml_opts{ user_lengths } = 1;
523 :     $proml_opts{ user_trees } = \@trees;
524 :     $proml_opts{ tree_format } = 'gjo';
525 :    
526 : golsen 1.6 delete $proml_opts{ alpha } if exists $proml_opts{ alpha };
527 :     delete $proml_opts{ categories } if exists $proml_opts{ categories };
528 :     delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
529 :     delete $proml_opts{ gamma_bins } if exists $proml_opts{ gamma_bins };
530 :     delete $proml_opts{ invar_frac } if exists $proml_opts{ invar_frac };
531 :     delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
532 :     delete $proml_opts{ n_jumble } if exists $proml_opts{ n_jumble };
533 :     delete $proml_opts{ rearrange } if exists $proml_opts{ rearrange };
534 :    
535 :     # Work throught the sites, finding their optimal rates/categories:
536 : overbeek 1.1
537 :     my @categories;
538 :     my @weights;
539 :     my $imax = length( $align[0]->[-1] );
540 :     for ( my $i = 0; $i < $imax; $i++ )
541 :     {
542 :     my $inform = 0;
543 :     my @align2 = map { my $c = substr( $_->[-1], $i, 1 );
544 : golsen 1.5 $inform++ if ( $c =~ m/[ACDEFGHIKLMNPQRSTVWY]/i );
545 : overbeek 1.1 [ $_->[0], $c ]
546 :     }
547 :     @align;
548 :    
549 :     # Only analyze the rate if there are 4 or more informative sequences:
550 :    
551 :     if ( $inform >= 4 )
552 :     {
553 :     my @results = proml::proml( \@align2, \%proml_opts );
554 :    
555 :     my ( $best ) = sort { $b->[1] <=> $a->[1] }
556 :     map { [ $_, @{ shift @results }[1] ] } # get the likelihoods
557 :     @cat_vals;
558 :    
559 : overbeek 1.3 # printf STDERR "%6d %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
560 : overbeek 1.1 push @categories, $best->[0];
561 :     push @weights, 1;
562 :     }
563 :     else
564 :     {
565 :     push @categories, 9;
566 :     push @weights, 0;
567 :     }
568 :     }
569 :    
570 :     # Find the minimum category value to appear:
571 :    
572 :     my ( $mincat ) = sort { $a <=> $b } @categories;
573 :     my $adjust = $mincat - 1;
574 :    
575 :     @categories = map { min( $_ - $adjust, 9 ) } @categories;
576 :     @rates = @rates[ $adjust .. ( $adjust+8 ) ];
577 :    
578 :     # Return category and weight data:
579 :    
580 : golsen 1.5 ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
581 : overbeek 1.1 }
582 :    
583 :    
584 :     sub min { $_[0] < $_[1] ? @_[0] : @_[1] }
585 :    
586 :    
587 :     #-------------------------------------------------------------------------------
588 :     # Auxiliary functions:
589 :     #-------------------------------------------------------------------------------
590 :    
591 :     sub write_infile
592 :     {
593 :     open( INFILE, '>infile' ) or return 0;
594 :     print INFILE scalar @_, ' ', length( $_[0]->[1] ), "\n";
595 :     foreach ( @_ ) { printf INFILE "%-10s %s\n", @$_ }
596 :     close( INFILE );
597 :     }
598 :    
599 :    
600 :     sub write_intree
601 :     {
602 :     open( INTREE, '>intree' ) or return 0;
603 :     print INTREE scalar @_, "\n";
604 :     foreach ( @_ ) { print INTREE gjonewicklib::strNewickTree( $_ ), "\n" }
605 :     close( INTREE );
606 :     }
607 :    
608 :    
609 :     sub write_categories
610 :     {
611 :     my $categories = shift;
612 :     open( CATEGORIES, '>categories' ) or return 0;
613 :     print CATEGORIES "$categories\n";
614 :     close( CATEGORIES );
615 :     }
616 :    
617 :    
618 :     sub write_weights
619 :     {
620 :     my $weights = shift;
621 :     open( WEIGHTS, '>weights' ) or return 0;
622 :     print WEIGHTS "$weights\n";
623 :     close( WEIGHTS );
624 :     }
625 :    
626 :    
627 :     sub read_outfile
628 :     {
629 :     open( OUTFILE, '<outfile' ) or return ();
630 : golsen 1.5 my @likelihoods = map { chomp; s/.* //; $_ }
631 :     grep { /^Ln Likelihood/ }
632 :     <OUTFILE>;
633 : overbeek 1.1 close( OUTFILE );
634 :     return @likelihoods;
635 :     }
636 :    
637 :    
638 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3