[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : overbeek 1.1 package proml;
2 :    
3 :     #===============================================================================
4 :     # A perl interface to the proml program in the PHYLIP program package
5 :     #
6 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
7 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
8 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
9 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
10 :     #
11 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
12 :     #
13 :     #-------------------------------------------------------------------------------
14 :     # A perl interface for using proml to estimate site-specific rates of change
15 :     #
16 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17 :     #
18 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
19 : overbeek 1.1 #
20 :     #===============================================================================
21 :     #
22 :     # A perl interface to the proml program in the PHYLIP program package
23 :     #
24 :     # @tree_likelihood_pairs = proml( \@alignment, \%options )
25 :     # @tree_likelihood_pairs = proml( \@alignment, %options )
26 :     # @tree_likelihood_pairs = proml( \%options ) # alignment must be included as option
27 :     # @tree_likelihood_pairs = proml( %options ) # alignment must be included as option
28 :     #
29 :     # @alignment = array of id_seq pairs, or id_definition_seq triples
30 :     #
31 :     # options:
32 :     #
33 :     # For proml:
34 :     # alignment => \@alignment the way to supply the alignment as an option, rather than first param
35 :     # alpha => float alpha parameter of gamma distribution (0.5 - inf)
36 : golsen 1.5 # categories => [ [ rate1, ... ], site_categories ]
37 : overbeek 1.1 # coef_of_var => float 1/sqrt(alpha) for gamma distribution (D = 0)
38 :     # gamma_bins => int number of rate categories used to approximate gamma (D=5)
39 :     # global => bool global rearrangements
40 :     # invar_frac => 0 - 1 fraction of site that are invariant
41 :     # jumble_seed => odd int jumble random seed
42 :     # model => model evolution model JTT (D) | PMB | PAM
43 :     # n_jumble => int number of jumbles
44 :     # persistance => float persistance length of rate category
45 : golsen 1.5 # rate_hmm => [ [ rate, prior_prob ] ... ] # not implimented
46 : overbeek 1.1 # rearrange => [ trees ] rearrange user trees
47 :     # slow => bool more accurate but slower search (D = 0)
48 :     # user_lengths => bool use supplied branch lengths
49 :     # user_trees => [ trees ] user trees
50 :     # weights => site_weights
51 :     #
52 :     # Other:
53 :     # keep_duplicates => bool do not remove duplicate sequences (D = false) [NOT IMPLIMENTED]
54 :     # program => program allows fully defined path
55 :     # tmp => directory directory for tmp_dir (D = /tmp or .)
56 :     # tmp_dir => directory directory for temporary files (D = $tmp/proml.$$)
57 :     # tree_format => overbeek | gjo | fig format of output tree
58 :     #
59 :     # tmp_dir is created and deleted unless its name is supplied, and it already
60 :     # exists.
61 :     #
62 :     #
63 :     # Options that do not require other data:
64 :     # G (global search toggle)
65 :     # L (user lengths toggle)
66 : golsen 1.5 # P (JTT / PMB / PAM cycle)
67 : overbeek 1.1 # S (slow and accurate)
68 :     # U (requires intree file)
69 :     # W (requires weights file)
70 :     #
71 :     # Some option data input orders:
72 :     #
73 :     # J
74 :     # Seed
75 :     # N reps
76 :     # Y
77 :     #
78 :     # R
79 :     # Y
80 :     # Coefficient of variation
81 :     # Rate categories
82 :     # Spurious random seed
83 :     #
84 :     # R
85 :     # R
86 :     # Y
87 :     # Coefficient of variation
88 :     # Gamma rate categories + 1
89 :     # Fraction invariant
90 :     # Spurious random seed
91 :     #
92 :     # C (requires categories file)
93 :     # N cat
94 :     # Rate values (n of them)
95 :    
96 :    
97 :     use strict;
98 :     use gjonewicklib qw( gjonewick_to_overbeek
99 :     newick_is_unrooted
100 :     newick_relabel_nodes
101 : golsen 1.5 newick_rescale_branches
102 : overbeek 1.1 newick_tree_length
103 :     overbeek_to_gjonewick
104 :     parse_newick_tree_str
105 :     strNewickTree
106 :     uproot_newick
107 :     );
108 :    
109 :    
110 :     sub proml
111 :     {
112 :     my $align;
113 :     if ( ref( $_[0] ) eq 'ARRAY' )
114 :     {
115 :     $align = shift @_;
116 :     ( $align && ( ref( $align ) eq 'ARRAY' ) )
117 :     || ( ( print STDERR "proml::proml() called without alignment\n" )
118 :     && ( return () )
119 :     );
120 :     }
121 :    
122 :     my %options;
123 :     if ( $_[0] )
124 :     {
125 :     %options = ( ref( $_[0]) eq 'HASH' ) ? %{ $_[0] } : @_;
126 :     }
127 :    
128 :     #---------------------------------------------------------------------------
129 :     # Work on a copy of the alignment. Id is always first, seq is always last
130 :     #---------------------------------------------------------------------------
131 :    
132 :     $align ||= $options{ alignment } || $options{ align };
133 :    
134 :     my ( $seq, $id );
135 :     my %id;
136 :     my %local_id;
137 :     my $local_id = 'seq0000000';
138 :     my @align = map { $id = $_->[0];
139 :     $local_id++;
140 :     $id{ $local_id } = $id;
141 :     $local_id{ $id } = $local_id;
142 :     $seq = $_->[-1];
143 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
144 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
145 :     [ $local_id, $seq ]
146 :     } @$align;
147 :    
148 :     #---------------------------------------------------------------------------
149 :     # Process proml options:
150 :     #---------------------------------------------------------------------------
151 :    
152 : golsen 1.5 # [ [ cat_rate1, ... ], site_categories ]
153 :     # Original format expected first field to be number of categories (which
154 :     # is redundant). Handling that form is what the shift if all about.
155 :    
156 :     my $categories = $options{ categories }; # [ [ cat_rate1, ... ], site_categories ]
157 : overbeek 1.1 if ( $categories )
158 :     {
159 :     if ( ref( $categories ) ne 'ARRAY'
160 : golsen 1.5 || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
161 :     || ref( $categories->[0] ) ne 'ARRAY'
162 : overbeek 1.1 )
163 :     {
164 : golsen 1.5 print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
165 : overbeek 1.1 return ();
166 :     }
167 :    
168 :     # Rate values cannot have very many decimal places or proml can't read it:
169 :    
170 : golsen 1.5 @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
171 : overbeek 1.1 }
172 :    
173 :     my $coef_of_var = $options{ coef_of_var }
174 :     || ( $options{ alpha } && ( $options{ alpha } > 0) && ( 1 / sqrt( $options{ alpha } ) ) )
175 :     || 0;
176 :     if ( $coef_of_var < 0 )
177 :     {
178 :     print STDERR "proml::proml coef_of_var option value must be >= 0\n";
179 :     return ();
180 :     }
181 :    
182 :     my $gamma_bins = int( $options{ gamma_bins } || ( $coef_of_var ? 5 : 2 ) );
183 :     if ( ( $gamma_bins < 2 ) || ( $gamma_bins > 9 ) )
184 :     {
185 :     print STDERR "proml::proml gamma_bins option value must be > 1 and <= 9\n";
186 :     return ();
187 :     }
188 :    
189 :     my $global = $options{ global } || 0;
190 :    
191 :     my $invar_frac = $options{ invar_frac } || 0;
192 :     if ( $invar_frac && ( $invar_frac < 0 || $invar_frac >= 1 ) )
193 :     {
194 :     print STDERR "proml::proml invar_frac option value must be >= 0 and < 1\n";
195 :     return ();
196 :     }
197 :    
198 :     my $n_jumble = int( $options{ n_jumble } || ( $options{ jumble_seed } ? 1 : 0) );
199 :     if ( $n_jumble < 0 )
200 :     {
201 :     print STDERR "proml::proml n_jumble option value must be >= 0\n";
202 :     return ();
203 :     }
204 : golsen 1.5
205 : overbeek 1.1 my $jumble_seed = int( $options{ jumble_seed } || 4 * int( 499999999 * rand() ) + 1 );
206 :     if ( ( $jumble_seed <= 0) || ( $jumble_seed % 2 != 1 ) )
207 :     {
208 :     print STDERR "proml::proml jumble_seed option value must be an odd number > 0\n";
209 :     return ();
210 :     }
211 :    
212 :     my $model = ( $options{ model } =~ m/PAM/i ) ? 'PAM'
213 :     : ( $options{ model } =~ m/Dayhoff/i ) ? 'PAM'
214 :     : ( $options{ model } =~ m/PMB/i ) ? 'PMB'
215 :     : ( $options{ model } =~ m/Henikoff/i ) ? 'PMB'
216 :     : ( $options{ model } =~ m/Tillier/i ) ? 'PMB'
217 :     : ( $options{ model } =~ m/JTT/i ) ? 'JTT'
218 :     : ( $options{ model } =~ m/Jones/i ) ? 'JTT'
219 :     : ( $options{ model } =~ m/Taylor/i ) ? 'JTT'
220 :     : ( $options{ model } =~ m/Thornton/i ) ? 'JTT'
221 :     : 'JTT';
222 :    
223 :     my $persistance = $options{ persistance } || 0;
224 :     if ( $persistance && ( $persistance <= 1 ) )
225 :     {
226 :     print STDERR "proml::proml persistance option value must be > 1\n";
227 :     return ();
228 :     }
229 : golsen 1.5
230 : overbeek 1.1 my $rearrange = $options{ rearrange };
231 :    
232 :     my $slow = $options{ slow };
233 :    
234 :     my $user_lengths = $options{ user_lengths };
235 :    
236 :     my $user_trees = $options{ user_trees } || $rearrange;
237 :    
238 :     if ( $user_trees )
239 :     {
240 :     if ( ( ref( $user_trees ) ne 'ARRAY' ) || ( ! @$user_trees ) )
241 :     {
242 :     $user_trees = undef; # No trees
243 :     }
244 :     elsif ( ref( $user_trees->[0] ) ne 'ARRAY' ) # First element not tree
245 :     {
246 :     print STDERR "proml::proml usertree or rearrange option value must be reference to list of trees\n";
247 :     return ();
248 :     }
249 :     }
250 :    
251 :     my $weights = $options{ weights };
252 : golsen 1.5
253 : overbeek 1.1
254 :     #---------------------------------------------------------------------------
255 :     # Options that are not proml options per se:
256 :     #---------------------------------------------------------------------------
257 :    
258 : golsen 1.5 my $program = $options{ program } || 'proml';
259 : overbeek 1.1
260 :     my $tmp = $options{ tmp };
261 :    
262 :     my $tmp_dir = $options{ tmp_dir };
263 :    
264 :     my $tree_format = $options{ tree_format } =~ m/overbeek/i ? 'overbeek'
265 :     : $options{ tree_format } =~ m/gjo/i ? 'gjonewick'
266 :     : $options{ tree_format } =~ m/fig/i ? 'fig'
267 :     : 'overbeek'; # Default
268 :    
269 :     my $save_tmp = $tmp_dir && -d $tmp_dir;
270 :     if ( $tmp_dir )
271 :     {
272 :     if ( -d $tmp_dir ) { $save_tmp = 1 }
273 :     else { mkdir $tmp_dir }
274 :     }
275 :     else
276 :     {
277 :     $tmp = $tmp && -d $tmp ? $tmp
278 :     : -d '/tmp' ? '/tmp'
279 :     : '.';
280 : overbeek 1.2 my $int = int( 1000000000 * rand);
281 :     $tmp_dir = "$tmp/proml.$$.$int";
282 : overbeek 1.1 mkdir $tmp_dir;
283 :     }
284 :    
285 :     #---------------------------------------------------------------------------
286 :     # Prepare data:
287 :     #---------------------------------------------------------------------------
288 :     #
289 :     # For simplicity, we will convert overbeek trees to gjo newick trees.
290 :     #
291 :     # gjonewick tree node: [ \@desc, $label, $x, \@c1, \@c2, \@c3, \@c4, \@c5 ]
292 :     #
293 :     # overbeek tree node: [ Label, DistanceToParent,
294 :     # [ ParentPointer, ChildPointer1, ... ],
295 :     # [ Name1\tVal1, Name2\tVal2, ... ]
296 :     # ]
297 :     # Root node of gjonewick always has a descendent list. If the first
298 :     # field of the first tree is not an array reference, they are overbeek
299 :     # trees. Also relabel tree tips to local ids.
300 :    
301 :     my @user_trees = ();
302 :     if ( $user_trees )
303 :     {
304 :     if ( @user_trees && ( ref( $user_trees[0]->[0] ) ne 'ARRAY' ) ) # overbeek trees
305 :     {
306 :     @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }
307 :     map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
308 :     @$user_trees;
309 :     }
310 :     else
311 :     {
312 :     @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }
313 :     @$user_trees;
314 :     }
315 :    
316 :     # Make sure trees are unrooted:
317 :    
318 :     @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
319 :     : gjonewicklib::uproot_newick( $_ )
320 :     }
321 :     @user_trees;
322 :     }
323 :    
324 :     #---------------------------------------------------------------------------
325 :     # Write the files and run the program:
326 :     #---------------------------------------------------------------------------
327 :    
328 :     my $cwd = $ENV{ cwd } || `pwd`;
329 :     chomp $cwd;
330 :     chdir $tmp_dir;
331 :    
332 :     unlink 'outfile' if -f 'outfile'; # Just checking
333 :     unlink 'outtree' if -f 'outtree'; # ditto
334 :    
335 :     &write_infile( @align ) or print STDERR "proml::proml: Could write infile\n"
336 :     and chdir $cwd
337 :     and return ();
338 :    
339 : golsen 1.5 open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
340 :     and chdir $cwd
341 :     and return ();
342 :    
343 : overbeek 1.1
344 : golsen 1.5 # Start writing optoins for program:
345 : overbeek 1.1
346 :     if ( $categories )
347 :     {
348 : golsen 1.5 &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
349 : overbeek 1.1 and chdir $cwd
350 :     and return ();
351 :     print PROML "C\n",
352 : golsen 1.5 scalar @{$categories->[0]}, "\n",
353 :     join( ' ', @{ $categories->[0] } ), "\n";
354 : overbeek 1.1 }
355 :    
356 :     if ( $invar_frac || $coef_of_var )
357 :     {
358 :     print PROML "R\n";
359 :     print PROML "R\n" if $invar_frac;
360 :     print PROML "A\n", "$persistance\n" if $persistance;
361 : golsen 1.5
362 : overbeek 1.1 }
363 :    
364 :     print PROML "G\n" if $global;
365 :    
366 :     print PROML "J\n", "$jumble_seed\n", "$n_jumble\n" if $n_jumble;
367 :    
368 :     print PROML "P\n" if $model =~ m/PMB/i;
369 :     print PROML "P\nP\n" if $model =~ m/PAM/i;
370 :    
371 :     print PROML "S\n" if $slow;
372 :    
373 :     if ( @user_trees )
374 :     {
375 :     &write_intree( @user_trees ) or print STDERR "proml::proml: Could write intree\n"
376 :     and chdir $cwd
377 :     and return ();
378 :     print PROML "U\n";
379 :     print PROML "V\n" if $rearrange || $global;
380 :     print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
381 :     }
382 :    
383 :     if ( $weights )
384 :     {
385 : golsen 1.5 &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
386 : overbeek 1.1 and chdir $cwd
387 :     and return ();
388 :     print PROML "W\n";
389 :     }
390 :    
391 : golsen 1.5 # All the options are written, try to lauch the run:
392 : overbeek 1.1
393 :     print PROML "Y\n";
394 :    
395 :     # Becuase of the options interface, these values have to be supplied after
396 :     # the Y:
397 :    
398 :     if ( $invar_frac || $coef_of_var )
399 :     {
400 :     if ( $invar_frac )
401 :     {
402 :     if ( $coef_of_var ) { $gamma_bins++ if ( $gamma_bins < 9 ) }
403 :     else { $gamma_bins = 2 }
404 :     }
405 :     print PROML "$coef_of_var\n";
406 :     print PROML "$gamma_bins\n";
407 :     print PROML "$invar_frac\n" if $invar_frac;
408 :     }
409 :     elsif ( $user_trees )
410 :     {
411 :     print PROML "13\n"; # Random number seed of unknown use
412 :     }
413 :    
414 :     close PROML;
415 : golsen 1.5
416 : overbeek 1.1 system "$program < params > /dev/null";
417 :    
418 :     my @likelihoods = &read_outfile();
419 :    
420 :     my @trees = gjonewicklib::read_newick_trees( 'outtree' );
421 :     @trees or print STDERR "proml::proml: Could read proml outtree file\n"
422 :     and chdir $cwd
423 :     and return ();
424 :    
425 :     # We are done, go back to the original directory:
426 :    
427 :     chdir $cwd;
428 :    
429 :     # Returned trees have our labels, and branch lengths that are in % change,
430 :     # not the more usual expected number per position:
431 :    
432 : golsen 1.5 my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
433 :     @trees;
434 : overbeek 1.1
435 :     if ( $tree_format =~ m/overbeek/i )
436 :     {
437 :     @trees = map { gjonewicklib::gjonewick_to_overbeek( $_ ) } @trees;
438 :     }
439 :    
440 :     system "/bin/rm -r $tmp_dir" if ! $save_tmp;
441 :    
442 :     return map { [ $_, shift @likelihoods ] } @trees;
443 :     }
444 :    
445 :    
446 :     #-------------------------------------------------------------------------------
447 :     # A perl interface for using proml to estimate site-specific rates of change
448 :     #
449 :     # ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
450 :     #
451 : golsen 1.5 # $categories = [ [ $rate1, ... ], $site_categories ];
452 : overbeek 1.1 #
453 :     # $alignment = [ [ id, def, seq ], ... ]
454 :     # or
455 :     # [ [ id, seq ], ... ]
456 :     #
457 :     # $tree = overbeek tree or gjonewick tree
458 :     #
459 :     # proml_opts is list of key value pairs, or reference to a hash
460 :     #-------------------------------------------------------------------------------
461 :    
462 :     sub estimate_protein_site_rates
463 :     {
464 :     my ( $align, $tree, @proml_opts ) = @_;
465 :    
466 : golsen 1.5 my ( $seq, $id );
467 :     my %local_id;
468 :     my $local_id = 'seq0000000';
469 :     my @align = map { $id = $_->[0];
470 :     $local_id{ $id } = ++$local_id;
471 :     $seq = $_->[-1];
472 :     $seq =~ s/[BJOUZ]/X/gi; # Bad letters go to X
473 :     $seq =~ s/[^A-Z]/-/gi; # Anything else becomes -
474 :     [ $local_id, $seq ]
475 :     } @$align;
476 : overbeek 1.1
477 : golsen 1.5 # Make the tree a gjonewick tree, uproot it, and change to the local ids.
478 : overbeek 1.1
479 :     if ( ref( $tree->[0] ) ne 'ARRAY' ) # overbeek tree
480 :     {
481 :     $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
482 :     }
483 : golsen 1.5 else
484 :     {
485 :     $tree = gjonewicklib::copy_newick_tree( $tree );
486 :     }
487 :    
488 :     $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
489 :    
490 :     gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
491 : overbeek 1.1
492 :     # The minimum rate will be 1/2 change per total tree branch length.
493 :     # This needs to be checked for proml. The intent is that he optimal
494 :     # rate for a site with one amino acid change is twice this value.
495 :    
496 :     my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
497 :    
498 : overbeek 1.3 # print STDERR "Length = ", gjonewicklib::newick_tree_length( $tree ), "; kmin = $kmin\n"; ## DEBUG ##
499 : overbeek 1.1
500 :     # Generate "rate variation" by rescaling the supplied tree. We could use a
501 :     # finer grain estimator, then categorize the inferred values. This might
502 :     # work slightly better (this is what DNArates currently does).
503 :    
504 :     my $f = exp( log( 2 ) / 1 ); # Interval of 2
505 :     my @rates = map { $kmin * $f**$_ } ( 0 .. 16 ); # kmin .. 65000 * kmin in 17 bins
506 :     my @cat_vals = ( 1 .. 17 );
507 :     my @trees;
508 :     my $rate;
509 :     foreach $rate ( @rates )
510 :     {
511 :     my $tr = gjonewicklib::copy_newick_tree( $tree );
512 :     gjonewicklib::newick_rescale_branches( $tr, $rate ); # Rescales in place
513 :     push @trees, $tr;
514 :     }
515 :    
516 :     # Adjust (a copy of) the proml opts:
517 :    
518 :     my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
519 :     $proml_opts{ alpha } = undef;
520 :     $proml_opts{ categories } = 0;
521 :     $proml_opts{ coef_of_var } = 0;
522 :     $proml_opts{ gamma_bins } = 0;
523 :     $proml_opts{ invar_frac } = 0;
524 :     $proml_opts{ jumble_seed } = 0;
525 :     $proml_opts{ n_jumble } = 0;
526 :     $proml_opts{ rearrange } = 0;
527 :     $proml_opts{ user_lengths } = 1;
528 :     $proml_opts{ user_trees } = \@trees;
529 :     $proml_opts{ tree_format } = 'gjo';
530 :    
531 :     # Work throught the sites, finding their optimal categories:
532 :    
533 :     my @categories;
534 :     my @weights;
535 :     my $imax = length( $align[0]->[-1] );
536 :     for ( my $i = 0; $i < $imax; $i++ )
537 :     {
538 :     my $inform = 0;
539 :     my @align2 = map { my $c = substr( $_->[-1], $i, 1 );
540 : golsen 1.5 $inform++ if ( $c =~ m/[ACDEFGHIKLMNPQRSTVWY]/i );
541 : overbeek 1.1 [ $_->[0], $c ]
542 :     }
543 :     @align;
544 :    
545 :     # Only analyze the rate if there are 4 or more informative sequences:
546 :    
547 :     if ( $inform >= 4 )
548 :     {
549 :     my @results = proml::proml( \@align2, \%proml_opts );
550 :    
551 :     my ( $best ) = sort { $b->[1] <=> $a->[1] }
552 :     map { [ $_, @{ shift @results }[1] ] } # get the likelihoods
553 :     @cat_vals;
554 :    
555 : overbeek 1.3 # printf STDERR "%6d %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
556 : overbeek 1.1 push @categories, $best->[0];
557 :     push @weights, 1;
558 :     }
559 :     else
560 :     {
561 :     push @categories, 9;
562 :     push @weights, 0;
563 :     }
564 :     }
565 :    
566 :     # Find the minimum category value to appear:
567 :    
568 :     my ( $mincat ) = sort { $a <=> $b } @categories;
569 :     my $adjust = $mincat - 1;
570 :    
571 :     @categories = map { min( $_ - $adjust, 9 ) } @categories;
572 :     @rates = @rates[ $adjust .. ( $adjust+8 ) ];
573 :    
574 :     # Return category and weight data:
575 :    
576 : golsen 1.5 ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
577 : overbeek 1.1 }
578 :    
579 :    
580 :     sub min { $_[0] < $_[1] ? @_[0] : @_[1] }
581 :    
582 :    
583 :     #-------------------------------------------------------------------------------
584 :     # Auxiliary functions:
585 :     #-------------------------------------------------------------------------------
586 :    
587 :     sub write_infile
588 :     {
589 :     open( INFILE, '>infile' ) or return 0;
590 :     print INFILE scalar @_, ' ', length( $_[0]->[1] ), "\n";
591 :     foreach ( @_ ) { printf INFILE "%-10s %s\n", @$_ }
592 :     close( INFILE );
593 :     }
594 :    
595 :    
596 :     sub write_intree
597 :     {
598 :     open( INTREE, '>intree' ) or return 0;
599 :     print INTREE scalar @_, "\n";
600 :     foreach ( @_ ) { print INTREE gjonewicklib::strNewickTree( $_ ), "\n" }
601 :     close( INTREE );
602 :     }
603 :    
604 :    
605 :     sub write_categories
606 :     {
607 :     my $categories = shift;
608 :     open( CATEGORIES, '>categories' ) or return 0;
609 :     print CATEGORIES "$categories\n";
610 :     close( CATEGORIES );
611 :     }
612 :    
613 :    
614 :     sub write_weights
615 :     {
616 :     my $weights = shift;
617 :     open( WEIGHTS, '>weights' ) or return 0;
618 :     print WEIGHTS "$weights\n";
619 :     close( WEIGHTS );
620 :     }
621 :    
622 :    
623 :     sub read_outfile
624 :     {
625 :     open( OUTFILE, '<outfile' ) or return ();
626 : golsen 1.5 my @likelihoods = map { chomp; s/.* //; $_ }
627 :     grep { /^Ln Likelihood/ }
628 :     <OUTFILE>;
629 : overbeek 1.1 close( OUTFILE );
630 :     return @likelihoods;
631 :     }
632 :    
633 :    
634 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3