[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (view) (download) (as text)

1 : overbeek 1.1 package proml;
2 :    
3 :     #===============================================================================
4 :     # A perl interface to the proml program in the PHYLIP program package
5 :     #
6 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
7 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
8 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
9 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
10 :     #
11 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
12 :     #
13 :     #-------------------------------------------------------------------------------
14 :     # A perl interface for using proml to estimate site-specific rates of change
15 :     #
16 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17 :     #
18 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
19 : overbeek 1.1 #
20 :     #===============================================================================
21 :     #
22 :     # A perl interface to the proml program in the PHYLIP program package
23 :     #
24 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
25 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
26 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
27 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
28 :     #
29 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
30 :     #
31 :     # options:
32 :     #
33 :     # For proml:
34 :     # alignment => \@alignment the way to supply the alignment as an option, rather than first param
35 :     # alpha => float alpha parameter of gamma distribution (0.5 - inf)
36 : golsen 1.5 # categories => [ [ rate1, ... ], site_categories ]
37 : overbeek 1.1 # coef_of_var => float 1/sqrt(alpha) for gamma distribution (D = 0)
38 :     # gamma_bins => int number of rate categories used to approximate gamma (D=5)
39 :     # global => bool global rearrangements
40 :     # invar_frac => 0 - 1 fraction of site that are invariant
41 :     # jumble_seed => odd int jumble random seed
42 :     # model => model evolution model JTT (D) | PMB | PAM
43 :     # n_jumble => int number of jumbles
44 :     # persistance => float persistance length of rate category
45 : golsen 1.5 # rate_hmm => [ [ rate, prior_prob ] ... ] # not implimented
46 : overbeek 1.1 # rearrange => [ trees ] rearrange user trees
47 :     # slow => bool more accurate but slower search (D = 0)
48 :     # user_lengths => bool use supplied branch lengths
49 :     # user_trees => [ trees ] user trees
50 :     # weights => site_weights
51 :     #
52 :     # Other:
53 :     # keep_duplicates => bool do not remove duplicate sequences (D = false) [NOT IMPLIMENTED]
54 :     # program => program allows fully defined path
55 : golsen 1.10 # tmp => directory directory for tmp_dir (D = SeedAware::location_of_tmp())
56 :     # tmp_dir => directory directory for temporary files (D = SeedAware::temporary_directory())
57 : overbeek 1.1 # tree_format => overbeek | gjo | fig format of output tree
58 :     #
59 :     # tmp_dir is created and deleted unless its name is supplied, and it already
60 :     # exists.
61 :     #
62 :     #
63 :     # Options that do not require other data:
64 :     # G (global search toggle)
65 :     # L (user lengths toggle)
66 : golsen 1.5 # P (JTT / PMB / PAM cycle)
67 : overbeek 1.1 # S (slow and accurate)
68 :     # U (requires intree file)
69 :     # W (requires weights file)
70 :     #
71 :     # Some option data input orders:
72 :     #
73 :     # J
74 :     # Seed
75 :     # N reps
76 :     # Y
77 :     #
78 :     # R
79 :     # Y
80 :     # Coefficient of variation
81 :     # Rate categories
82 :     # Spurious random seed
83 :     #
84 :     # R
85 :     # R
86 :     # Y
87 :     # Coefficient of variation
88 :     # Gamma rate categories + 1
89 :     # Fraction invariant
90 :     # Spurious random seed
91 :     #
92 :     # C (requires categories file)
93 :     # N cat
94 :     # Rate values (n of them)
95 :    
96 : golsen 1.10 use SeedAware;
97 : golsen 1.8 use Data::Dumper;
98 :    
99 : overbeek 1.1 use strict;
100 :     use gjonewicklib qw( gjonewick_to_overbeek
101 :     newick_is_unrooted
102 :     newick_relabel_nodes
103 : golsen 1.5 newick_rescale_branches
104 : overbeek 1.1 newick_tree_length
105 :     overbeek_to_gjonewick
106 :     parse_newick_tree_str
107 :     strNewickTree
108 :     uproot_newick
109 :     );
110 :    
111 :     sub proml
112 :     {
113 :     my $align;
114 :     if ( ref( $_[0] ) eq 'ARRAY' )
115 :     {
116 :     $align = shift @_;
117 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
118 :     || ( ( print STDERR "proml::proml() called without alignment\n" )
119 :     && ( return () )
120 :     );
121 :     }
122 :    
123 :     my %options;
124 :     if ( $_[0] )
125 :     {
126 :     %options = ( ref( $_[0]) eq 'HASH' ) ? %{ $_[0] } : @_;
127 :     }
128 :    
129 :     #---------------------------------------------------------------------------
130 :     # Work on a copy of the alignment. Id is always first, seq is always last
131 :     #---------------------------------------------------------------------------
132 :    
133 :     $align ||= $options{ alignment } || $options{ align };
134 :    
135 :     my ( $seq, $id );
136 :     my %id;
137 :     my %local_id;
138 :     my $local_id = 'seq0000000';
139 :     my @align = map { $id = $_->[0];
140 :     $local_id++;
141 :     $id{ $local_id } = $id;
142 :     $local_id{ $id } = $local_id;
143 :     $seq = $_->[-1];
144 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
145 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
146 :     [ $local_id, $seq ]
147 :     } @$align;
148 :    
149 :     #---------------------------------------------------------------------------
150 :     # Process proml options:
151 :     #---------------------------------------------------------------------------
152 :    
153 : golsen 1.5 # [ [ cat_rate1, ... ], site_categories ]
154 :     # Original format expected first field to be number of categories (which
155 :     # is redundant). Handling that form is what the shift if all about.
156 :    
157 :     my $categories = $options{ categories }; # [ [ cat_rate1, ... ], site_categories ]
158 : overbeek 1.1 if ( $categories )
159 :     {
160 :     if ( ref( $categories ) ne 'ARRAY'
161 : golsen 1.5 || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
162 :     || ref( $categories->[0] ) ne 'ARRAY'
163 : overbeek 1.1 )
164 :     {
165 : golsen 1.9 print STDERR "proml::proml() categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
166 : overbeek 1.1 return ();
167 :     }
168 :    
169 :     # Rate values cannot have very many decimal places or proml can't read it:
170 :    
171 : golsen 1.5 @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
172 : overbeek 1.1 }
173 :    
174 :     my $coef_of_var = $options{ coef_of_var }
175 :     || ( $options{ alpha } && ( $options{ alpha } > 0) && ( 1 / sqrt( $options{ alpha } ) ) )
176 :     || 0;
177 :     if ( $coef_of_var < 0 )
178 :     {
179 : golsen 1.9 print STDERR "proml::proml() coef_of_var option value must be >= 0\n";
180 : overbeek 1.1 return ();
181 :     }
182 :    
183 :     my $gamma_bins = int( $options{ gamma_bins } || ( $coef_of_var ? 5 : 2 ) );
184 :     if ( ( $gamma_bins < 2 ) || ( $gamma_bins > 9 ) )
185 :     {
186 : golsen 1.9 print STDERR "proml::proml() gamma_bins option value must be > 1 and <= 9\n";
187 : overbeek 1.1 return ();
188 :     }
189 :    
190 :     my $global = $options{ global } || 0;
191 :    
192 :     my $invar_frac = $options{ invar_frac } || 0;
193 :     if ( $invar_frac && ( $invar_frac < 0 || $invar_frac >= 1 ) )
194 :     {
195 : golsen 1.9 print STDERR "proml::proml() invar_frac option value must be >= 0 and < 1\n";
196 : overbeek 1.1 return ();
197 :     }
198 :    
199 :     my $n_jumble = int( $options{ n_jumble } || ( $options{ jumble_seed } ? 1 : 0) );
200 :     if ( $n_jumble < 0 )
201 :     {
202 : golsen 1.9 print STDERR "proml::proml() n_jumble option value must be >= 0\n";
203 : overbeek 1.1 return ();
204 :     }
205 : golsen 1.5
206 : overbeek 1.1 my $jumble_seed = int( $options{ jumble_seed } || 4 * int( 499999999 * rand() ) + 1 );
207 :     if ( ( $jumble_seed <= 0) || ( $jumble_seed % 2 != 1 ) )
208 :     {
209 : golsen 1.9 print STDERR "proml::proml() jumble_seed option value must be an odd number > 0\n";
210 : overbeek 1.1 return ();
211 :     }
212 :    
213 :     my $model = ( $options{ model } =~ m/PAM/i ) ? 'PAM'
214 :     : ( $options{ model } =~ m/Dayhoff/i ) ? 'PAM'
215 :     : ( $options{ model } =~ m/PMB/i ) ? 'PMB'
216 :     : ( $options{ model } =~ m/Henikoff/i ) ? 'PMB'
217 :     : ( $options{ model } =~ m/Tillier/i ) ? 'PMB'
218 :     : ( $options{ model } =~ m/JTT/i ) ? 'JTT'
219 :     : ( $options{ model } =~ m/Jones/i ) ? 'JTT'
220 :     : ( $options{ model } =~ m/Taylor/i ) ? 'JTT'
221 :     : ( $options{ model } =~ m/Thornton/i ) ? 'JTT'
222 :     : 'JTT';
223 :    
224 :     my $persistance = $options{ persistance } || 0;
225 :     if ( $persistance && ( $persistance <= 1 ) )
226 :     {
227 : golsen 1.9 print STDERR "proml::proml() persistance option value must be > 1\n";
228 : overbeek 1.1 return ();
229 :     }
230 : golsen 1.5
231 : overbeek 1.1 my $rearrange = $options{ rearrange };
232 :    
233 :     my $slow = $options{ slow };
234 :    
235 :     my $user_lengths = $options{ user_lengths };
236 :    
237 :     my $user_trees = $options{ user_trees } || $rearrange;
238 :    
239 :     if ( $user_trees )
240 :     {
241 :     if ( ( ref( $user_trees ) ne 'ARRAY' ) || ( ! @$user_trees ) )
242 :     {
243 :     $user_trees = undef; # No trees
244 :     }
245 :     elsif ( ref( $user_trees->[0] ) ne 'ARRAY' ) # First element not tree
246 :     {
247 : golsen 1.9 print STDERR "proml::proml() user_trees or rearrange option value must be reference to list of trees\n";
248 : overbeek 1.1 return ();
249 :     }
250 :     }
251 :    
252 :     my $weights = $options{ weights };
253 : golsen 1.5
254 : overbeek 1.1 #---------------------------------------------------------------------------
255 :     # Options that are not proml options per se:
256 :     #---------------------------------------------------------------------------
257 :    
258 : golsen 1.10 my $program = $options{ program } || SeedAware::executable_for( 'proml', \%options );
259 : overbeek 1.1
260 :     my $tree_format = $options{ tree_format } =~ m/overbeek/i ? 'overbeek'
261 :     : $options{ tree_format } =~ m/gjo/i ? 'gjonewick'
262 :     : $options{ tree_format } =~ m/fig/i ? 'fig'
263 :     : 'overbeek'; # Default
264 :    
265 : golsen 1.10 my ( $tmp_dir, $save_tmp ) = SeedAware::temporary_directory( \%options );
266 : overbeek 1.1
267 :     #---------------------------------------------------------------------------
268 :     # Prepare data:
269 :     #---------------------------------------------------------------------------
270 :     #
271 :     # For simplicity, we will convert overbeek trees to gjo newick trees.
272 :     #
273 :     # gjonewick tree node: [ \@desc, $label, $x, \@c1, \@c2, \@c3, \@c4, \@c5 ]
274 :     #
275 :     # overbeek tree node: [ Label, DistanceToParent,
276 :     # [ ParentPointer, ChildPointer1, ... ],
277 :     # [ Name1\tVal1, Name2\tVal2, ... ]
278 :     # ]
279 :     # Root node of gjonewick always has a descendent list. If the first
280 :     # field of the first tree is not an array reference, they are overbeek
281 : golsen 1.8 # trees.
282 : overbeek 1.1
283 :     my @user_trees = ();
284 : golsen 1.8 if ( @$user_trees )
285 : overbeek 1.1 {
286 : golsen 1.8 if ( ref( @$user_trees[0]->[0] ) ne 'ARRAY' ) # overbeek trees
287 : overbeek 1.1 {
288 : golsen 1.8 @user_trees = map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
289 : overbeek 1.1 @$user_trees;
290 :     }
291 :     else
292 :     {
293 : golsen 1.8 @user_trees = map { gjonewicklib::copy_newick_tree( $_ ) }
294 : overbeek 1.1 @$user_trees;
295 :     }
296 :    
297 : golsen 1.8 # Relabel and make sure trees are unrooted:
298 : overbeek 1.1
299 :     @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
300 :     : gjonewicklib::uproot_newick( $_ )
301 :     }
302 : golsen 1.8 map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ); $_ }
303 : overbeek 1.1 @user_trees;
304 :     }
305 :    
306 :     #---------------------------------------------------------------------------
307 :     # Write the files and run the program:
308 :     #---------------------------------------------------------------------------
309 :    
310 :     my $cwd = $ENV{ cwd } || `pwd`;
311 :     chomp $cwd;
312 :     chdir $tmp_dir;
313 :    
314 :     unlink 'outfile' if -f 'outfile'; # Just checking
315 :     unlink 'outtree' if -f 'outtree'; # ditto
316 :    
317 : golsen 1.6 &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
318 : overbeek 1.1 and chdir $cwd
319 :     and return ();
320 :    
321 : golsen 1.5 open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
322 :     and chdir $cwd
323 :     and return ();
324 :    
325 : overbeek 1.1
326 : golsen 1.6 # Start writing options for program:
327 : overbeek 1.1
328 :     if ( $categories )
329 :     {
330 : golsen 1.5 &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
331 : overbeek 1.1 and chdir $cwd
332 :     and return ();
333 :     print PROML "C\n",
334 : golsen 1.5 scalar @{$categories->[0]}, "\n",
335 :     join( ' ', @{ $categories->[0] } ), "\n";
336 : overbeek 1.1 }
337 :    
338 :     if ( $invar_frac || $coef_of_var )
339 :     {
340 :     print PROML "R\n";
341 :     print PROML "R\n" if $invar_frac;
342 :     print PROML "A\n", "$persistance\n" if $persistance;
343 : golsen 1.5
344 : overbeek 1.1 }
345 :    
346 :     print PROML "G\n" if $global;
347 :    
348 :     print PROML "J\n", "$jumble_seed\n", "$n_jumble\n" if $n_jumble;
349 :    
350 :     print PROML "P\n" if $model =~ m/PMB/i;
351 :     print PROML "P\nP\n" if $model =~ m/PAM/i;
352 :    
353 :     if ( @user_trees )
354 :     {
355 : golsen 1.6 &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
356 : overbeek 1.1 and chdir $cwd
357 :     and return ();
358 :     print PROML "U\n";
359 :     print PROML "V\n" if $rearrange || $global;
360 :     print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
361 :     }
362 : golsen 1.7 elsif ( $slow ) # Slow and user trees are mutually exclusive
363 :     {
364 :     print PROML "S\n";
365 :     }
366 : overbeek 1.1
367 :     if ( $weights )
368 :     {
369 : golsen 1.5 &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
370 : overbeek 1.1 and chdir $cwd
371 :     and return ();
372 :     print PROML "W\n";
373 :     }
374 :    
375 : golsen 1.7 # All the options are written, try to launch the run:
376 : overbeek 1.1
377 :     print PROML "Y\n";
378 :    
379 :     # Becuase of the options interface, these values have to be supplied after
380 :     # the Y:
381 :    
382 :     if ( $invar_frac || $coef_of_var )
383 :     {
384 :     if ( $invar_frac )
385 :     {
386 :     if ( $coef_of_var ) { $gamma_bins++ if ( $gamma_bins < 9 ) }
387 :     else { $gamma_bins = 2 }
388 :     }
389 :     print PROML "$coef_of_var\n";
390 :     print PROML "$gamma_bins\n";
391 : golsen 1.7 print PROML "$invar_frac\n" if $invar_frac;
392 : overbeek 1.1 }
393 : golsen 1.7
394 :     if ( $user_trees )
395 : overbeek 1.1 {
396 :     print PROML "13\n"; # Random number seed of unknown use
397 :     }
398 :    
399 :     close PROML;
400 : golsen 1.5
401 : golsen 1.10 my $redirects = { stdin => 'params',
402 :     stdout => '/dev/null'
403 :     };
404 :     SeedAware::system_with_redirect( $program, $redirects )
405 :     and print STDERR "proml::proml: Failed to run '$program'.\n"
406 :     and reurn ();
407 : overbeek 1.1
408 :     my @likelihoods = &read_outfile();
409 :    
410 :     my @trees = gjonewicklib::read_newick_trees( 'outtree' );
411 : golsen 1.8 @trees or print STDERR "proml::proml: Could not read proml outtree file\n"
412 : overbeek 1.1 and chdir $cwd
413 :     and return ();
414 :    
415 :     # We are done, go back to the original directory:
416 :    
417 :     chdir $cwd;
418 :    
419 :     # Returned trees have our labels, and branch lengths that are in % change,
420 :     # not the more usual expected number per position:
421 :    
422 : golsen 1.9 @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) } @trees;
423 : overbeek 1.1
424 :     if ( $tree_format =~ m/overbeek/i )
425 :     {
426 :     @trees = map { gjonewicklib::gjonewick_to_overbeek( $_ ) } @trees;
427 :     }
428 :    
429 : golsen 1.10 system( '/bin/rm', -r => $tmp_dir ) if ! $save_tmp;
430 : overbeek 1.1
431 :     return map { [ $_, shift @likelihoods ] } @trees;
432 :     }
433 :    
434 :    
435 :     #-------------------------------------------------------------------------------
436 :     # A perl interface for using proml to estimate site-specific rates of change
437 :     #
438 : golsen 1.6 # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, %proml_opts )
439 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
440 : overbeek 1.1 #
441 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
442 : overbeek 1.1 #
443 :     # $alignment = [ [ id, def, seq ], ... ]
444 :     # or
445 :     # [ [ id, seq ], ... ]
446 :     #
447 :     # $tree = overbeek tree or gjonewick tree
448 :     #
449 :     # proml_opts is list of key value pairs, or reference to a hash
450 :     #-------------------------------------------------------------------------------
451 :    
452 :     sub estimate_protein_site_rates
453 :     {
454 :     my ( $align, $tree, @proml_opts ) = @_;
455 :    
456 : golsen 1.5 my ( $seq, $id );
457 :     my %local_id;
458 :     my $local_id = 'seq0000000';
459 :     my @align = map { $id = $_->[0];
460 :     $local_id{ $id } = ++$local_id;
461 :     $seq = $_->[-1];
462 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
463 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
464 :     [ $local_id, $seq ]
465 :     } @$align;
466 : overbeek 1.1
467 : golsen 1.5 # Make the tree a gjonewick tree, uproot it, and change to the local ids.
468 : overbeek 1.1
469 :     if ( ref( $tree->[0] ) ne 'ARRAY' ) # overbeek tree
470 :     {
471 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
472 :     }
473 : golsen 1.5 else
474 :     {
475 :     $tree = gjonewicklib::copy_newick_tree( $tree );
476 :     }
477 :    
478 :     $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
479 :    
480 :     gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
481 : overbeek 1.1
482 :     # The minimum rate will be 1/2 change per total tree branch length.
483 :     # This needs to be checked for proml. The intent is that he optimal
484 :     # rate for a site with one amino acid change is twice this value.
485 :    
486 :     my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
487 :    
488 :     # Generate "rate variation" by rescaling the supplied tree. We could use a
489 :     # finer grain estimator, then categorize the inferred values. This might
490 :     # work slightly better (this is what DNArates currently does).
491 :    
492 :     my $f = exp( log( 2 ) / 1 ); # Interval of 2
493 :     my @rates = map { $kmin * $f**$_ } ( 0 .. 16 ); # kmin .. 65000 * kmin in 17 bins
494 :     my @cat_vals = ( 1 .. 17 );
495 :     my @trees;
496 :     my $rate;
497 :     foreach $rate ( @rates )
498 :     {
499 :     my $tr = gjonewicklib::copy_newick_tree( $tree );
500 :     gjonewicklib::newick_rescale_branches( $tr, $rate ); # Rescales in place
501 :     push @trees, $tr;
502 :     }
503 :    
504 :     # Adjust (a copy of) the proml opts:
505 :    
506 :     my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
507 : golsen 1.6
508 : overbeek 1.1 $proml_opts{ user_lengths } = 1;
509 :     $proml_opts{ user_trees } = \@trees;
510 :     $proml_opts{ tree_format } = 'gjo';
511 :    
512 : golsen 1.6 delete $proml_opts{ alpha } if exists $proml_opts{ alpha };
513 :     delete $proml_opts{ categories } if exists $proml_opts{ categories };
514 :     delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
515 :     delete $proml_opts{ gamma_bins } if exists $proml_opts{ gamma_bins };
516 :     delete $proml_opts{ invar_frac } if exists $proml_opts{ invar_frac };
517 :     delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
518 :     delete $proml_opts{ n_jumble } if exists $proml_opts{ n_jumble };
519 :     delete $proml_opts{ rearrange } if exists $proml_opts{ rearrange };
520 :    
521 :     # Work throught the sites, finding their optimal rates/categories:
522 : overbeek 1.1
523 :     my @categories;
524 :     my @weights;
525 :     my $imax = length( $align[0]->[-1] );
526 :     for ( my $i = 0; $i < $imax; $i++ )
527 :     {
528 :     my $inform = 0;
529 :     my @align2 = map { my $c = substr( $_->[-1], $i, 1 );
530 : golsen 1.5 $inform++ if ( $c =~ m/[ACDEFGHIKLMNPQRSTVWY]/i );
531 : overbeek 1.1 [ $_->[0], $c ]
532 :     }
533 :     @align;
534 :    
535 :     # Only analyze the rate if there are 4 or more informative sequences:
536 :    
537 :     if ( $inform >= 4 )
538 :     {
539 :     my @results = proml::proml( \@align2, \%proml_opts );
540 :    
541 :     my ( $best ) = sort { $b->[1] <=> $a->[1] }
542 :     map { [ $_, @{ shift @results }[1] ] } # get the likelihoods
543 :     @cat_vals;
544 :    
545 : overbeek 1.3 # printf STDERR "%6d %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
546 : overbeek 1.1 push @categories, $best->[0];
547 :     push @weights, 1;
548 :     }
549 :     else
550 :     {
551 :     push @categories, 9;
552 :     push @weights, 0;
553 :     }
554 :     }
555 :    
556 :     # Find the minimum category value to appear:
557 :    
558 :     my ( $mincat ) = sort { $a <=> $b } @categories;
559 :     my $adjust = $mincat - 1;
560 :    
561 :     @categories = map { min( $_ - $adjust, 9 ) } @categories;
562 :     @rates = @rates[ $adjust .. ( $adjust+8 ) ];
563 :    
564 :     # Return category and weight data:
565 :    
566 : golsen 1.5 ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
567 : overbeek 1.1 }
568 :    
569 :    
570 :     #-------------------------------------------------------------------------------
571 :     # Auxiliary functions:
572 :     #-------------------------------------------------------------------------------
573 :    
574 : golsen 1.9 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
575 :    
576 :    
577 : overbeek 1.1 sub write_infile
578 :     {
579 :     open( INFILE, '>infile' ) or return 0;
580 :     print INFILE scalar @_, ' ', length( $_[0]->[1] ), "\n";
581 :     foreach ( @_ ) { printf INFILE "%-10s %s\n", @$_ }
582 :     close( INFILE );
583 :     }
584 :    
585 :    
586 :     sub write_intree
587 :     {
588 :     open( INTREE, '>intree' ) or return 0;
589 :     print INTREE scalar @_, "\n";
590 :     foreach ( @_ ) { print INTREE gjonewicklib::strNewickTree( $_ ), "\n" }
591 :     close( INTREE );
592 :     }
593 :    
594 :    
595 :     sub write_categories
596 :     {
597 :     my $categories = shift;
598 :     open( CATEGORIES, '>categories' ) or return 0;
599 :     print CATEGORIES "$categories\n";
600 :     close( CATEGORIES );
601 :     }
602 :    
603 :    
604 :     sub write_weights
605 :     {
606 :     my $weights = shift;
607 :     open( WEIGHTS, '>weights' ) or return 0;
608 :     print WEIGHTS "$weights\n";
609 :     close( WEIGHTS );
610 :     }
611 :    
612 :    
613 :     sub read_outfile
614 :     {
615 :     open( OUTFILE, '<outfile' ) or return ();
616 : golsen 1.5 my @likelihoods = map { chomp; s/.* //; $_ }
617 :     grep { /^Ln Likelihood/ }
618 :     <OUTFILE>;
619 : overbeek 1.1 close( OUTFILE );
620 :     return @likelihoods;
621 :     }
622 :    
623 :    
624 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3