[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : overbeek 1.1 package proml;
2 :    
3 :     #===============================================================================
4 :     # A perl interface to the proml program in the PHYLIP program package
5 :     #
6 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
7 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
8 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
9 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
10 :     #
11 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
12 :     #
13 :     #-------------------------------------------------------------------------------
14 :     # A perl interface for using proml to estimate site-specific rates of change
15 :     #
16 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17 :     #
18 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
19 : overbeek 1.1 #
20 :     #===============================================================================
21 :     #
22 :     # A perl interface to the proml program in the PHYLIP program package
23 :     #
24 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
25 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
26 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
27 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
28 :     #
29 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
30 :     #
31 :     # options:
32 :     #
33 :     # For proml:
34 :     # alignment => \@alignment the way to supply the alignment as an option, rather than first param
35 :     # alpha => float alpha parameter of gamma distribution (0.5 - inf)
36 : golsen 1.5 # categories => [ [ rate1, ... ], site_categories ]
37 : overbeek 1.1 # coef_of_var => float 1/sqrt(alpha) for gamma distribution (D = 0)
38 :     # gamma_bins => int number of rate categories used to approximate gamma (D=5)
39 :     # global => bool global rearrangements
40 :     # invar_frac => 0 - 1 fraction of site that are invariant
41 :     # jumble_seed => odd int jumble random seed
42 :     # model => model evolution model JTT (D) | PMB | PAM
43 :     # n_jumble => int number of jumbles
44 :     # persistance => float persistance length of rate category
45 : golsen 1.5 # rate_hmm => [ [ rate, prior_prob ] ... ] # not implimented
46 : overbeek 1.1 # rearrange => [ trees ] rearrange user trees
47 :     # slow => bool more accurate but slower search (D = 0)
48 :     # user_lengths => bool use supplied branch lengths
49 :     # user_trees => [ trees ] user trees
50 :     # weights => site_weights
51 :     #
52 :     # Other:
53 :     # keep_duplicates => bool do not remove duplicate sequences (D = false) [NOT IMPLIMENTED]
54 :     # program => program allows fully defined path
55 :     # tmp => directory directory for tmp_dir (D = /tmp or .)
56 :     # tmp_dir => directory directory for temporary files (D = $tmp/proml.$$)
57 :     # tree_format => overbeek | gjo | fig format of output tree
58 :     #
59 :     # tmp_dir is created and deleted unless its name is supplied, and it already
60 :     # exists.
61 :     #
62 :     #
63 :     # Options that do not require other data:
64 :     # G (global search toggle)
65 :     # L (user lengths toggle)
66 : golsen 1.5 # P (JTT / PMB / PAM cycle)
67 : overbeek 1.1 # S (slow and accurate)
68 :     # U (requires intree file)
69 :     # W (requires weights file)
70 :     #
71 :     # Some option data input orders:
72 :     #
73 :     # J
74 :     # Seed
75 :     # N reps
76 :     # Y
77 :     #
78 :     # R
79 :     # Y
80 :     # Coefficient of variation
81 :     # Rate categories
82 :     # Spurious random seed
83 :     #
84 :     # R
85 :     # R
86 :     # Y
87 :     # Coefficient of variation
88 :     # Gamma rate categories + 1
89 :     # Fraction invariant
90 :     # Spurious random seed
91 :     #
92 :     # C (requires categories file)
93 :     # N cat
94 :     # Rate values (n of them)
95 :    
96 :    
97 : golsen 1.8 use Data::Dumper;
98 :    
99 : overbeek 1.1 use strict;
100 :     use gjonewicklib qw( gjonewick_to_overbeek
101 :     newick_is_unrooted
102 :     newick_relabel_nodes
103 : golsen 1.5 newick_rescale_branches
104 : overbeek 1.1 newick_tree_length
105 :     overbeek_to_gjonewick
106 :     parse_newick_tree_str
107 :     strNewickTree
108 :     uproot_newick
109 :     );
110 :    
111 :    
112 :     sub proml
113 :     {
114 :     my $align;
115 :     if ( ref( $_[0] ) eq 'ARRAY' )
116 :     {
117 :     $align = shift @_;
118 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
119 :     || ( ( print STDERR "proml::proml() called without alignment\n" )
120 :     && ( return () )
121 :     );
122 :     }
123 :    
124 :     my %options;
125 :     if ( $_[0] )
126 :     {
127 :     %options = ( ref( $_[0]) eq 'HASH' ) ? %{ $_[0] } : @_;
128 :     }
129 :    
130 :     #---------------------------------------------------------------------------
131 :     # Work on a copy of the alignment. Id is always first, seq is always last
132 :     #---------------------------------------------------------------------------
133 :    
134 :     $align ||= $options{ alignment } || $options{ align };
135 :    
136 :     my ( $seq, $id );
137 :     my %id;
138 :     my %local_id;
139 :     my $local_id = 'seq0000000';
140 :     my @align = map { $id = $_->[0];
141 :     $local_id++;
142 :     $id{ $local_id } = $id;
143 :     $local_id{ $id } = $local_id;
144 :     $seq = $_->[-1];
145 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
146 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
147 :     [ $local_id, $seq ]
148 :     } @$align;
149 :    
150 :     #---------------------------------------------------------------------------
151 :     # Process proml options:
152 :     #---------------------------------------------------------------------------
153 :    
154 : golsen 1.5 # [ [ cat_rate1, ... ], site_categories ]
155 :     # Original format expected first field to be number of categories (which
156 :     # is redundant). Handling that form is what the shift if all about.
157 :    
158 :     my $categories = $options{ categories }; # [ [ cat_rate1, ... ], site_categories ]
159 : overbeek 1.1 if ( $categories )
160 :     {
161 :     if ( ref( $categories ) ne 'ARRAY'
162 : golsen 1.5 || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
163 :     || ref( $categories->[0] ) ne 'ARRAY'
164 : overbeek 1.1 )
165 :     {
166 : golsen 1.5 print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
167 : overbeek 1.1 return ();
168 :     }
169 :    
170 :     # Rate values cannot have very many decimal places or proml can't read it:
171 :    
172 : golsen 1.5 @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
173 : overbeek 1.1 }
174 :    
175 :     my $coef_of_var = $options{ coef_of_var }
176 :     || ( $options{ alpha } && ( $options{ alpha } > 0) && ( 1 / sqrt( $options{ alpha } ) ) )
177 :     || 0;
178 :     if ( $coef_of_var < 0 )
179 :     {
180 :     print STDERR "proml::proml coef_of_var option value must be >= 0\n";
181 :     return ();
182 :     }
183 :    
184 :     my $gamma_bins = int( $options{ gamma_bins } || ( $coef_of_var ? 5 : 2 ) );
185 :     if ( ( $gamma_bins < 2 ) || ( $gamma_bins > 9 ) )
186 :     {
187 :     print STDERR "proml::proml gamma_bins option value must be > 1 and <= 9\n";
188 :     return ();
189 :     }
190 :    
191 :     my $global = $options{ global } || 0;
192 :    
193 :     my $invar_frac = $options{ invar_frac } || 0;
194 :     if ( $invar_frac && ( $invar_frac < 0 || $invar_frac >= 1 ) )
195 :     {
196 :     print STDERR "proml::proml invar_frac option value must be >= 0 and < 1\n";
197 :     return ();
198 :     }
199 :    
200 :     my $n_jumble = int( $options{ n_jumble } || ( $options{ jumble_seed } ? 1 : 0) );
201 :     if ( $n_jumble < 0 )
202 :     {
203 :     print STDERR "proml::proml n_jumble option value must be >= 0\n";
204 :     return ();
205 :     }
206 : golsen 1.5
207 : overbeek 1.1 my $jumble_seed = int( $options{ jumble_seed } || 4 * int( 499999999 * rand() ) + 1 );
208 :     if ( ( $jumble_seed <= 0) || ( $jumble_seed % 2 != 1 ) )
209 :     {
210 :     print STDERR "proml::proml jumble_seed option value must be an odd number > 0\n";
211 :     return ();
212 :     }
213 :    
214 :     my $model = ( $options{ model } =~ m/PAM/i ) ? 'PAM'
215 :     : ( $options{ model } =~ m/Dayhoff/i ) ? 'PAM'
216 :     : ( $options{ model } =~ m/PMB/i ) ? 'PMB'
217 :     : ( $options{ model } =~ m/Henikoff/i ) ? 'PMB'
218 :     : ( $options{ model } =~ m/Tillier/i ) ? 'PMB'
219 :     : ( $options{ model } =~ m/JTT/i ) ? 'JTT'
220 :     : ( $options{ model } =~ m/Jones/i ) ? 'JTT'
221 :     : ( $options{ model } =~ m/Taylor/i ) ? 'JTT'
222 :     : ( $options{ model } =~ m/Thornton/i ) ? 'JTT'
223 :     : 'JTT';
224 :    
225 :     my $persistance = $options{ persistance } || 0;
226 :     if ( $persistance && ( $persistance <= 1 ) )
227 :     {
228 :     print STDERR "proml::proml persistance option value must be > 1\n";
229 :     return ();
230 :     }
231 : golsen 1.5
232 : overbeek 1.1 my $rearrange = $options{ rearrange };
233 :    
234 :     my $slow = $options{ slow };
235 :    
236 :     my $user_lengths = $options{ user_lengths };
237 :    
238 :     my $user_trees = $options{ user_trees } || $rearrange;
239 :    
240 :     if ( $user_trees )
241 :     {
242 :     if ( ( ref( $user_trees ) ne 'ARRAY' ) || ( ! @$user_trees ) )
243 :     {
244 :     $user_trees = undef; # No trees
245 :     }
246 :     elsif ( ref( $user_trees->[0] ) ne 'ARRAY' ) # First element not tree
247 :     {
248 : golsen 1.8 print STDERR "proml::proml user_trees or rearrange option value must be reference to list of trees\n";
249 : overbeek 1.1 return ();
250 :     }
251 :     }
252 :    
253 :     my $weights = $options{ weights };
254 : golsen 1.5
255 : overbeek 1.1
256 :     #---------------------------------------------------------------------------
257 :     # Options that are not proml options per se:
258 :     #---------------------------------------------------------------------------
259 :    
260 : golsen 1.5 my $program = $options{ program } || 'proml';
261 : overbeek 1.1
262 :     my $tmp = $options{ tmp };
263 :    
264 :     my $tmp_dir = $options{ tmp_dir };
265 :    
266 :     my $tree_format = $options{ tree_format } =~ m/overbeek/i ? 'overbeek'
267 :     : $options{ tree_format } =~ m/gjo/i ? 'gjonewick'
268 :     : $options{ tree_format } =~ m/fig/i ? 'fig'
269 :     : 'overbeek'; # Default
270 :    
271 :     my $save_tmp = $tmp_dir && -d $tmp_dir;
272 :     if ( $tmp_dir )
273 :     {
274 :     if ( -d $tmp_dir ) { $save_tmp = 1 }
275 :     else { mkdir $tmp_dir }
276 :     }
277 :     else
278 :     {
279 :     $tmp = $tmp && -d $tmp ? $tmp
280 :     : -d '/tmp' ? '/tmp'
281 :     : '.';
282 : overbeek 1.2 my $int = int( 1000000000 * rand);
283 :     $tmp_dir = "$tmp/proml.$$.$int";
284 : overbeek 1.1 mkdir $tmp_dir;
285 :     }
286 :    
287 :     #---------------------------------------------------------------------------
288 :     # Prepare data:
289 :     #---------------------------------------------------------------------------
290 :     #
291 :     # For simplicity, we will convert overbeek trees to gjo newick trees.
292 :     #
293 :     # gjonewick tree node: [ \@desc, $label, $x, \@c1, \@c2, \@c3, \@c4, \@c5 ]
294 :     #
295 :     # overbeek tree node: [ Label, DistanceToParent,
296 :     # [ ParentPointer, ChildPointer1, ... ],
297 :     # [ Name1\tVal1, Name2\tVal2, ... ]
298 :     # ]
299 :     # Root node of gjonewick always has a descendent list. If the first
300 :     # field of the first tree is not an array reference, they are overbeek
301 : golsen 1.8 # trees.
302 : overbeek 1.1
303 :     my @user_trees = ();
304 : golsen 1.8 if ( @$user_trees )
305 : overbeek 1.1 {
306 : golsen 1.8 if ( ref( @$user_trees[0]->[0] ) ne 'ARRAY' ) # overbeek trees
307 : overbeek 1.1 {
308 : golsen 1.8 @user_trees = map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
309 : overbeek 1.1 @$user_trees;
310 :     }
311 :     else
312 :     {
313 : golsen 1.8 @user_trees = map { gjonewicklib::copy_newick_tree( $_ ) }
314 : overbeek 1.1 @$user_trees;
315 :     }
316 :    
317 : golsen 1.8 # Relabel and make sure trees are unrooted:
318 : overbeek 1.1
319 :     @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
320 :     : gjonewicklib::uproot_newick( $_ )
321 :     }
322 : golsen 1.8 map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ); $_ }
323 : overbeek 1.1 @user_trees;
324 :     }
325 :    
326 :     #---------------------------------------------------------------------------
327 :     # Write the files and run the program:
328 :     #---------------------------------------------------------------------------
329 :    
330 :     my $cwd = $ENV{ cwd } || `pwd`;
331 :     chomp $cwd;
332 :     chdir $tmp_dir;
333 :    
334 :     unlink 'outfile' if -f 'outfile'; # Just checking
335 :     unlink 'outtree' if -f 'outtree'; # ditto
336 :    
337 : golsen 1.6 &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
338 : overbeek 1.1 and chdir $cwd
339 :     and return ();
340 :    
341 : golsen 1.5 open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
342 :     and chdir $cwd
343 :     and return ();
344 :    
345 : overbeek 1.1
346 : golsen 1.6 # Start writing options for program:
347 : overbeek 1.1
348 :     if ( $categories )
349 :     {
350 : golsen 1.5 &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
351 : overbeek 1.1 and chdir $cwd
352 :     and return ();
353 :     print PROML "C\n",
354 : golsen 1.5 scalar @{$categories->[0]}, "\n",
355 :     join( ' ', @{ $categories->[0] } ), "\n";
356 : overbeek 1.1 }
357 :    
358 :     if ( $invar_frac || $coef_of_var )
359 :     {
360 :     print PROML "R\n";
361 :     print PROML "R\n" if $invar_frac;
362 :     print PROML "A\n", "$persistance\n" if $persistance;
363 : golsen 1.5
364 : overbeek 1.1 }
365 :    
366 :     print PROML "G\n" if $global;
367 :    
368 :     print PROML "J\n", "$jumble_seed\n", "$n_jumble\n" if $n_jumble;
369 :    
370 :     print PROML "P\n" if $model =~ m/PMB/i;
371 :     print PROML "P\nP\n" if $model =~ m/PAM/i;
372 :    
373 :     if ( @user_trees )
374 :     {
375 : golsen 1.6 &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
376 : overbeek 1.1 and chdir $cwd
377 :     and return ();
378 :     print PROML "U\n";
379 :     print PROML "V\n" if $rearrange || $global;
380 :     print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
381 :     }
382 : golsen 1.7 elsif ( $slow ) # Slow and user trees are mutually exclusive
383 :     {
384 :     print PROML "S\n";
385 :     }
386 : overbeek 1.1
387 :     if ( $weights )
388 :     {
389 : golsen 1.5 &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
390 : overbeek 1.1 and chdir $cwd
391 :     and return ();
392 :     print PROML "W\n";
393 :     }
394 :    
395 : golsen 1.7 # All the options are written, try to launch the run:
396 : overbeek 1.1
397 :     print PROML "Y\n";
398 :    
399 :     # Becuase of the options interface, these values have to be supplied after
400 :     # the Y:
401 :    
402 :     if ( $invar_frac || $coef_of_var )
403 :     {
404 :     if ( $invar_frac )
405 :     {
406 :     if ( $coef_of_var ) { $gamma_bins++ if ( $gamma_bins < 9 ) }
407 :     else { $gamma_bins = 2 }
408 :     }
409 :     print PROML "$coef_of_var\n";
410 :     print PROML "$gamma_bins\n";
411 : golsen 1.7 print PROML "$invar_frac\n" if $invar_frac;
412 : overbeek 1.1 }
413 : golsen 1.7
414 :     if ( $user_trees )
415 : overbeek 1.1 {
416 :     print PROML "13\n"; # Random number seed of unknown use
417 :     }
418 :    
419 :     close PROML;
420 : golsen 1.5
421 : overbeek 1.1 system "$program < params > /dev/null";
422 :    
423 :     my @likelihoods = &read_outfile();
424 :    
425 :     my @trees = gjonewicklib::read_newick_trees( 'outtree' );
426 : golsen 1.8 @trees or print STDERR "proml::proml: Could not read proml outtree file\n"
427 : overbeek 1.1 and chdir $cwd
428 :     and return ();
429 :    
430 :     # We are done, go back to the original directory:
431 :    
432 :     chdir $cwd;
433 :    
434 :     # Returned trees have our labels, and branch lengths that are in % change,
435 :     # not the more usual expected number per position:
436 :    
437 : golsen 1.5 my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
438 :     @trees;
439 : overbeek 1.1
440 :     if ( $tree_format =~ m/overbeek/i )
441 :     {
442 :     @trees = map { gjonewicklib::gjonewick_to_overbeek( $_ ) } @trees;
443 :     }
444 :    
445 :     system "/bin/rm -r $tmp_dir" if ! $save_tmp;
446 :    
447 :     return map { [ $_, shift @likelihoods ] } @trees;
448 :     }
449 :    
450 :    
451 :     #-------------------------------------------------------------------------------
452 :     # A perl interface for using proml to estimate site-specific rates of change
453 :     #
454 : golsen 1.6 # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, %proml_opts )
455 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
456 : overbeek 1.1 #
457 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
458 : overbeek 1.1 #
459 :     # $alignment = [ [ id, def, seq ], ... ]
460 :     # or
461 :     # [ [ id, seq ], ... ]
462 :     #
463 :     # $tree = overbeek tree or gjonewick tree
464 :     #
465 :     # proml_opts is list of key value pairs, or reference to a hash
466 :     #-------------------------------------------------------------------------------
467 :    
468 :     sub estimate_protein_site_rates
469 :     {
470 :     my ( $align, $tree, @proml_opts ) = @_;
471 :    
472 : golsen 1.5 my ( $seq, $id );
473 :     my %local_id;
474 :     my $local_id = 'seq0000000';
475 :     my @align = map { $id = $_->[0];
476 :     $local_id{ $id } = ++$local_id;
477 :     $seq = $_->[-1];
478 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
479 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
480 :     [ $local_id, $seq ]
481 :     } @$align;
482 : overbeek 1.1
483 : golsen 1.5 # Make the tree a gjonewick tree, uproot it, and change to the local ids.
484 : overbeek 1.1
485 :     if ( ref( $tree->[0] ) ne 'ARRAY' ) # overbeek tree
486 :     {
487 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
488 :     }
489 : golsen 1.5 else
490 :     {
491 :     $tree = gjonewicklib::copy_newick_tree( $tree );
492 :     }
493 :    
494 :     $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
495 :    
496 :     gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
497 : overbeek 1.1
498 :     # The minimum rate will be 1/2 change per total tree branch length.
499 :     # This needs to be checked for proml. The intent is that he optimal
500 :     # rate for a site with one amino acid change is twice this value.
501 :    
502 :     my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
503 :    
504 :     # Generate "rate variation" by rescaling the supplied tree. We could use a
505 :     # finer grain estimator, then categorize the inferred values. This might
506 :     # work slightly better (this is what DNArates currently does).
507 :    
508 :     my $f = exp( log( 2 ) / 1 ); # Interval of 2
509 :     my @rates = map { $kmin * $f**$_ } ( 0 .. 16 ); # kmin .. 65000 * kmin in 17 bins
510 :     my @cat_vals = ( 1 .. 17 );
511 :     my @trees;
512 :     my $rate;
513 :     foreach $rate ( @rates )
514 :     {
515 :     my $tr = gjonewicklib::copy_newick_tree( $tree );
516 :     gjonewicklib::newick_rescale_branches( $tr, $rate ); # Rescales in place
517 :     push @trees, $tr;
518 :     }
519 :    
520 :     # Adjust (a copy of) the proml opts:
521 :    
522 :     my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
523 : golsen 1.6
524 : overbeek 1.1 $proml_opts{ user_lengths } = 1;
525 :     $proml_opts{ user_trees } = \@trees;
526 :     $proml_opts{ tree_format } = 'gjo';
527 :    
528 : golsen 1.6 delete $proml_opts{ alpha } if exists $proml_opts{ alpha };
529 :     delete $proml_opts{ categories } if exists $proml_opts{ categories };
530 :     delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
531 :     delete $proml_opts{ gamma_bins } if exists $proml_opts{ gamma_bins };
532 :     delete $proml_opts{ invar_frac } if exists $proml_opts{ invar_frac };
533 :     delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
534 :     delete $proml_opts{ n_jumble } if exists $proml_opts{ n_jumble };
535 :     delete $proml_opts{ rearrange } if exists $proml_opts{ rearrange };
536 :    
537 :     # Work throught the sites, finding their optimal rates/categories:
538 : overbeek 1.1
539 :     my @categories;
540 :     my @weights;
541 :     my $imax = length( $align[0]->[-1] );
542 :     for ( my $i = 0; $i < $imax; $i++ )
543 :     {
544 :     my $inform = 0;
545 :     my @align2 = map { my $c = substr( $_->[-1], $i, 1 );
546 : golsen 1.5 $inform++ if ( $c =~ m/[ACDEFGHIKLMNPQRSTVWY]/i );
547 : overbeek 1.1 [ $_->[0], $c ]
548 :     }
549 :     @align;
550 :    
551 :     # Only analyze the rate if there are 4 or more informative sequences:
552 :    
553 :     if ( $inform >= 4 )
554 :     {
555 :     my @results = proml::proml( \@align2, \%proml_opts );
556 :    
557 :     my ( $best ) = sort { $b->[1] <=> $a->[1] }
558 :     map { [ $_, @{ shift @results }[1] ] } # get the likelihoods
559 :     @cat_vals;
560 :    
561 : overbeek 1.3 # printf STDERR "%6d %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
562 : overbeek 1.1 push @categories, $best->[0];
563 :     push @weights, 1;
564 :     }
565 :     else
566 :     {
567 :     push @categories, 9;
568 :     push @weights, 0;
569 :     }
570 :     }
571 :    
572 :     # Find the minimum category value to appear:
573 :    
574 :     my ( $mincat ) = sort { $a <=> $b } @categories;
575 :     my $adjust = $mincat - 1;
576 :    
577 :     @categories = map { min( $_ - $adjust, 9 ) } @categories;
578 :     @rates = @rates[ $adjust .. ( $adjust+8 ) ];
579 :    
580 :     # Return category and weight data:
581 :    
582 : golsen 1.5 ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
583 : overbeek 1.1 }
584 :    
585 :    
586 :     sub min { $_[0] < $_[1] ? @_[0] : @_[1] }
587 :    
588 :    
589 :     #-------------------------------------------------------------------------------
590 :     # Auxiliary functions:
591 :     #-------------------------------------------------------------------------------
592 :    
593 :     sub write_infile
594 :     {
595 :     open( INFILE, '>infile' ) or return 0;
596 :     print INFILE scalar @_, ' ', length( $_[0]->[1] ), "\n";
597 :     foreach ( @_ ) { printf INFILE "%-10s %s\n", @$_ }
598 :     close( INFILE );
599 :     }
600 :    
601 :    
602 :     sub write_intree
603 :     {
604 :     open( INTREE, '>intree' ) or return 0;
605 :     print INTREE scalar @_, "\n";
606 :     foreach ( @_ ) { print INTREE gjonewicklib::strNewickTree( $_ ), "\n" }
607 :     close( INTREE );
608 :     }
609 :    
610 :    
611 :     sub write_categories
612 :     {
613 :     my $categories = shift;
614 :     open( CATEGORIES, '>categories' ) or return 0;
615 :     print CATEGORIES "$categories\n";
616 :     close( CATEGORIES );
617 :     }
618 :    
619 :    
620 :     sub write_weights
621 :     {
622 :     my $weights = shift;
623 :     open( WEIGHTS, '>weights' ) or return 0;
624 :     print WEIGHTS "$weights\n";
625 :     close( WEIGHTS );
626 :     }
627 :    
628 :    
629 :     sub read_outfile
630 :     {
631 :     open( OUTFILE, '<outfile' ) or return ();
632 : golsen 1.5 my @likelihoods = map { chomp; s/.* //; $_ }
633 :     grep { /^Ln Likelihood/ }
634 :     <OUTFILE>;
635 : overbeek 1.1 close( OUTFILE );
636 :     return @likelihoods;
637 :     }
638 :    
639 :    
640 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3