[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sat Jan 13 03:22:44 2007 UTC revision 1.8, Sun Feb 11 18:35:46 2007 UTC
# Line 15  Line 15 
15  #  #
16  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17  #  #
18  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
19  #  #
20  #===============================================================================  #===============================================================================
21  #  #
# Line 33  Line 33 
33  #    For proml:  #    For proml:
34  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param
35  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)
36  #      categories   => [ n, [ rates ], site_categories ]  #      categories   => [ [ rate1, ... ], site_categories ]
37  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)
38  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)
39  #      global       => bool           global rearrangements  #      global       => bool           global rearrangements
# Line 42  Line 42 
42  #      model        => model          evolution model JTT (D) | PMB | PAM  #      model        => model          evolution model JTT (D) | PMB | PAM
43  #      n_jumble     => int            number of jumbles  #      n_jumble     => int            number of jumbles
44  #      persistance  => float          persistance length of rate category  #      persistance  => float          persistance length of rate category
45  #      rate_hmm     => [ n_rates, [ rates ], [ probabilies ] ]  #      rate_hmm     => [ [ rate, prior_prob ] ... ]   # not implimented
46  #      rearrange    => [ trees ]      rearrange user trees  #      rearrange    => [ trees ]      rearrange user trees
47  #      slow         => bool           more accurate but slower search (D = 0)  #      slow         => bool           more accurate but slower search (D = 0)
48  #      user_lengths => bool           use supplied branch lengths  #      user_lengths => bool           use supplied branch lengths
# Line 63  Line 63 
63  #  Options that do not require other data:  #  Options that do not require other data:
64  #    G (global search toggle)  #    G (global search toggle)
65  #    L (user lengths toggle)  #    L (user lengths toggle)
66  #    P (JTT / PAM toggle)  #    P (JTT / PMB / PAM cycle)
67  #    S (slow and accurate)  #    S (slow and accurate)
68  #    U (requires intree file)  #    U (requires intree file)
69  #    W (requires weights file)  #    W (requires weights file)
# Line 94  Line 94 
94  #  Rate values (n of them)  #  Rate values (n of them)
95    
96    
97    use Data::Dumper;
98    
99  use strict;  use strict;
100  use gjonewicklib qw( gjonewick_to_overbeek  use gjonewicklib qw( gjonewick_to_overbeek
101                       newick_is_unrooted                       newick_is_unrooted
102                       newick_relabel_nodes                       newick_relabel_nodes
103                         newick_rescale_branches
104                       newick_tree_length                       newick_tree_length
105                       overbeek_to_gjonewick                       overbeek_to_gjonewick
106                       parse_newick_tree_str                       parse_newick_tree_str
# Line 148  Line 151 
151      #  Process proml options:      #  Process proml options:
152      #---------------------------------------------------------------------------      #---------------------------------------------------------------------------
153    
154      my $categories   = $options{ categories };  # [ n_cat, [ cat_rates ], site_cats ]      #  [ [ cat_rate1, ... ], site_categories ]
155        #  Original format expected first field to be number of categories (which
156        #  is redundant).  Handling that form is what the shift if all about.
157    
158        my $categories   = $options{ categories };  # [ [ cat_rate1, ... ], site_categories ]
159      if ( $categories )      if ( $categories )
160      {      {
161          if ( ref( $categories ) ne 'ARRAY'          if ( ref( $categories ) ne 'ARRAY'
162            || @$categories != 3            || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
163            || $categories->[0] < 1 || $categories->[0] > 9            || ref( $categories->[0] ) ne 'ARRAY'
           || ref( $categories->[1] ) ne 'ARRAY'  
           || @{$categories->[1]} != $categories->[0]  
164             )             )
165          {          {
166              print STDERR "proml::proml categories option value must be [ n_cat, [ cat_rates ], site_cats ]\n";              print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
167              return ();              return ();
168          }          }
169    
170          #  Rate values cannot have very many decimal places or proml can't read it:          #  Rate values cannot have very many decimal places or proml can't read it:
171    
172          @{$categories->[1]} = map { sprintf "%.6f", $_ } @{$categories->[1]};          @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
173      }      }
174    
175      my $coef_of_var  = $options{ coef_of_var }      my $coef_of_var  = $options{ coef_of_var }
# Line 240  Line 245 
245          }          }
246          elsif ( ref( $user_trees->[0] ) ne 'ARRAY' )  # First element not tree          elsif ( ref( $user_trees->[0] ) ne 'ARRAY' )  # First element not tree
247          {          {
248              print STDERR "proml::proml usertree or rearrange option value must be reference to list of trees\n";              print STDERR "proml::proml user_trees or rearrange option value must be reference to list of trees\n";
249              return ();              return ();
250          }          }
251      }      }
# Line 293  Line 298 
298      #                            ]      #                            ]
299      #  Root node of gjonewick always has a descendent list.  If the first      #  Root node of gjonewick always has a descendent list.  If the first
300      #  field of the first tree is not an array reference, they are overbeek      #  field of the first tree is not an array reference, they are overbeek
301      #  trees.  Also relabel tree tips to local ids.      #  trees.
302    
303      my @user_trees = ();      my @user_trees = ();
304      if ( $user_trees )      if ( @$user_trees )
305      {      {
306          if ( @user_trees && ( ref( $user_trees[0]->[0] ) ne 'ARRAY' ) )  # overbeek trees          if ( ref( @$user_trees[0]->[0] ) ne 'ARRAY' )  # overbeek trees
307          {          {
308              @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }              @user_trees = map { gjonewicklib::overbeek_to_gjonewick( $_ ) }
                           map { gjonewicklib::overbeek_to_gjonewick( $_ ) }  
309                            @$user_trees;                            @$user_trees;
310          }          }
311          else          else
312          {          {
313              @user_trees = map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ) }              @user_trees = map { gjonewicklib::copy_newick_tree( $_ ) }
314                            @$user_trees;                            @$user_trees;
315          }          }
316    
317          # Make sure trees are unrooted:          # Relabel and make sure trees are unrooted:
318    
319          @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_          @user_trees = map { gjonewicklib::newick_is_unrooted( $_ ) ? $_
320                                                                     : gjonewicklib::uproot_newick( $_ )                                                                     : gjonewicklib::uproot_newick( $_ )
321                            }                            }
322                          map { gjonewicklib::newick_relabel_nodes( $_, \%local_id ); $_ }
323                        @user_trees;                        @user_trees;
324      }      }
325    
# Line 329  Line 334 
334      unlink 'outfile' if -f 'outfile';  # Just checking      unlink 'outfile' if -f 'outfile';  # Just checking
335      unlink 'outtree' if -f 'outtree';  # ditto      unlink 'outtree' if -f 'outtree';  # ditto
336    
337      &write_infile( @align ) or print STDERR "proml::proml: Could write infile\n"      &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
338                                 and chdir $cwd                                 and chdir $cwd
339                                 and return ();                                 and return ();
340    
341      # open( PROML, "| $program > /dev/null" ) or print STDERR "proml::proml: Could not open pipe to $program\n"      open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
     open( PROML, ">params" ) or print STDERR "proml::proml: Could not open pipe to $program\n"  
342                                                 and chdir $cwd                                                 and chdir $cwd
343                                                 and return ();                                                 and return ();
344    
345    
346      #  Start sending optoins ot program:      #  Start writing options for program:
347    
348      if ( $categories )      if ( $categories )
349      {      {
350          &write_categories( $categories->[2] ) or print STDERR "proml::proml: Could not write categories\n"          &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
351                                                   and chdir $cwd                                                   and chdir $cwd
352                                                   and return ();                                                   and return ();
353          print PROML "C\n",          print PROML "C\n",
354                      "$categories->[0]\n",                      scalar @{$categories->[0]}, "\n",
355                      join( ' ', @{ $categories->[1] } ), "\n";                      join( ' ', @{ $categories->[0] } ), "\n";
356      }      }
357    
358      if ( $invar_frac || $coef_of_var )      if ( $invar_frac || $coef_of_var )
# Line 366  Line 370 
370      print PROML "P\n"    if $model =~ m/PMB/i;      print PROML "P\n"    if $model =~ m/PMB/i;
371      print PROML "P\nP\n" if $model =~ m/PAM/i;      print PROML "P\nP\n" if $model =~ m/PAM/i;
372    
     print PROML "S\n" if $slow;  
   
373      if ( @user_trees )      if ( @user_trees )
374      {      {
375          &write_intree( @user_trees ) or print STDERR "proml::proml: Could write intree\n"          &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
376                                          and chdir $cwd                                          and chdir $cwd
377                                          and return ();                                          and return ();
378          print PROML "U\n";          print PROML "U\n";
379          print PROML "V\n" if $rearrange || $global;          print PROML "V\n" if $rearrange || $global;
380          print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;          print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
381      }      }
382        elsif ( $slow )  # Slow and user trees are mutually exclusive
383        {
384            print PROML "S\n";
385        }
386    
387      if ( $weights )      if ( $weights )
388      {      {
389          &write_weights( $weights ) or print STDERR "proml::proml: Could write weights\n"          &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
390                                        and chdir $cwd                                        and chdir $cwd
391                                        and return ();                                        and return ();
392          print PROML "W\n";          print PROML "W\n";
393      }      }
394    
395      #  All the options are sent, try to lauch the run:      #  All the options are written, try to launch the run:
396    
397      print PROML "Y\n";      print PROML "Y\n";
398    
# Line 404  Line 410 
410          print PROML "$gamma_bins\n";          print PROML "$gamma_bins\n";
411          print PROML "$invar_frac\n" if $invar_frac;          print PROML "$invar_frac\n" if $invar_frac;
412      }      }
413      elsif ( $user_trees )  
414        if ( $user_trees )
415      {      {
416          print PROML "13\n";     #  Random number seed of unknown use          print PROML "13\n";     #  Random number seed of unknown use
417      }      }
# Line 416  Line 423 
423      my @likelihoods = &read_outfile();      my @likelihoods = &read_outfile();
424    
425      my @trees = gjonewicklib::read_newick_trees( 'outtree' );      my @trees = gjonewicklib::read_newick_trees( 'outtree' );
426      @trees or print STDERR "proml::proml: Could read proml outtree file\n"      @trees or print STDERR "proml::proml: Could not read proml outtree file\n"
427                and chdir $cwd                and chdir $cwd
428                and return ();                and return ();
429    
# Line 427  Line 434 
434      #  Returned trees have our labels, and branch lengths that are in % change,      #  Returned trees have our labels, and branch lengths that are in % change,
435      #  not the more usual expected number per position:      #  not the more usual expected number per position:
436    
437      my @trees = map { gjonewicklib::newick_rescale_branches( $_, 0.01 );      my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
                       gjonewicklib::newick_relabel_nodes( $_, \%id )  
                     }  
438                  @trees;                  @trees;
439    
440      if ( $tree_format =~ m/overbeek/i )      if ( $tree_format =~ m/overbeek/i )
# Line 446  Line 451 
451  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
452  #  A perl interface for using proml to estimate site-specific rates of change  #  A perl interface for using proml to estimate site-specific rates of change
453  #  #
454  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree,  %proml_opts )
455    #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
456  #  #
457  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
458  #  #
459  #  $alignment = [ [ id, def, seq ], ... ]  #  $alignment = [ [ id, def, seq ], ... ]
460  #             or  #             or
# Line 463  Line 469 
469  {  {
470      my ( $align, $tree, @proml_opts ) = @_;      my ( $align, $tree, @proml_opts ) = @_;
471    
472      my @align = @$align;      my ( $seq, $id );
473        my %local_id;
474        my $local_id = 'seq0000000';
475        my @align = map { $id = $_->[0];
476                          $local_id{ $id } = ++$local_id;
477                          $seq = $_->[-1];
478                          $seq =~ s/[BJOUZ]/X/gi;  # Bad letters go to X
479                          $seq =~ s/[^A-Z]/-/gi;   # Anything else becomes -
480                          [ $local_id, $seq ]
481                        } @$align;
482    
483      #  Make the tree into a gjonewick tree so that we can manipulate it:      #  Make the tree a gjonewick tree, uproot it, and change to the local ids.
484    
485      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree
486      {      {
487          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
488      }      }
489        else
490        {
491            $tree = gjonewicklib::copy_newick_tree( $tree );
492        }
493    
494        $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
495    
496        gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
497    
498      #  The minimum rate will be 1/2 change per total tree branch length.      #  The minimum rate will be 1/2 change per total tree branch length.
499      #  This needs to be checked for proml.  The intent is that he optimal      #  This needs to be checked for proml.  The intent is that he optimal
# Line 478  Line 501 
501    
502      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
503    
     print STDERR "Length = ", gjonewicklib::newick_tree_length( $tree ), "; kmin = $kmin\n"; ## DEBUG ##  
   
504      #  Generate "rate variation" by rescaling the supplied tree.  We could use a      #  Generate "rate variation" by rescaling the supplied tree.  We could use a
505      #  finer grain estimator, then categorize the inferred values.  This might      #  finer grain estimator, then categorize the inferred values.  This might
506      #  work slightly better (this is what DNArates currently does).      #  work slightly better (this is what DNArates currently does).
# Line 499  Line 520 
520      #  Adjust (a copy of) the proml opts:      #  Adjust (a copy of) the proml opts:
521    
522      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
523      $proml_opts{ alpha        } =  undef;  
     $proml_opts{ categories   } =  0;  
     $proml_opts{ coef_of_var  } =  0;  
     $proml_opts{ gamma_bins   } =  0;  
     $proml_opts{ invar_frac   } =  0;  
     $proml_opts{ jumble_seed  } =  0;  
     $proml_opts{ n_jumble     } =  0;  
     $proml_opts{ rearrange    } =  0;  
524      $proml_opts{ user_lengths } =  1;      $proml_opts{ user_lengths } =  1;
525      $proml_opts{ user_trees   } = \@trees;      $proml_opts{ user_trees   } = \@trees;
526      $proml_opts{ tree_format  } = 'gjo';      $proml_opts{ tree_format  } = 'gjo';
527    
528      #  Work throught the sites, finding their optimal categories:      delete $proml_opts{ alpha       } if exists $proml_opts{ alpha       };
529        delete $proml_opts{ categories  } if exists $proml_opts{ categories  };
530        delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
531        delete $proml_opts{ gamma_bins  } if exists $proml_opts{ gamma_bins  };
532        delete $proml_opts{ invar_frac  } if exists $proml_opts{ invar_frac  };
533        delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
534        delete $proml_opts{ n_jumble    } if exists $proml_opts{ n_jumble    };
535        delete $proml_opts{ rearrange   } if exists $proml_opts{ rearrange   };
536    
537        #  Work throught the sites, finding their optimal rates/categories:
538    
539      my @categories;      my @categories;
540      my @weights;      my @weights;
# Line 535  Line 558 
558                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods
559                             @cat_vals;                             @cat_vals;
560    
561              printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##  #           printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
562              push @categories, $best->[0];              push @categories, $best->[0];
563              push @weights,    1;              push @weights,    1;
564          }          }
# Line 556  Line 579 
579    
580      #  Return category and weight data:      #  Return category and weight data:
581    
582      ( [ scalar @rates, \@rates, join( '', @categories ) ], join( '', @weights ) )      ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
583  }  }
584    
585    
# Line 605  Line 628 
628    
629  sub read_outfile  sub read_outfile
630  {  {
     my @likelihoods;  
631      open( OUTFILE, '<outfile' ) or return ();      open( OUTFILE, '<outfile' ) or return ();
632      while ( defined( $_ = <OUTFILE> ) )      my @likelihoods = map  { chomp; s/.* //; $_ }
633      {                        grep { /^Ln Likelihood/ }
634          next if ! m/^Ln /;                        <OUTFILE>;
         chomp;  
         s/.* //;  
         push @likelihoods, $_;  
     }  
635      close( OUTFILE );      close( OUTFILE );
636      return @likelihoods;      return @likelihoods;
637  }  }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3