[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sat Jan 13 03:22:44 2007 UTC revision 1.6, Wed Jan 31 01:06:51 2007 UTC
# Line 15  Line 15 
15  #  #
16  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17  #  #
18  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
19  #  #
20  #===============================================================================  #===============================================================================
21  #  #
# Line 33  Line 33 
33  #    For proml:  #    For proml:
34  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param
35  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)
36  #      categories   => [ n, [ rates ], site_categories ]  #      categories   => [ [ rate1, ... ], site_categories ]
37  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)
38  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)
39  #      global       => bool           global rearrangements  #      global       => bool           global rearrangements
# Line 42  Line 42 
42  #      model        => model          evolution model JTT (D) | PMB | PAM  #      model        => model          evolution model JTT (D) | PMB | PAM
43  #      n_jumble     => int            number of jumbles  #      n_jumble     => int            number of jumbles
44  #      persistance  => float          persistance length of rate category  #      persistance  => float          persistance length of rate category
45  #      rate_hmm     => [ n_rates, [ rates ], [ probabilies ] ]  #      rate_hmm     => [ [ rate, prior_prob ] ... ]   # not implimented
46  #      rearrange    => [ trees ]      rearrange user trees  #      rearrange    => [ trees ]      rearrange user trees
47  #      slow         => bool           more accurate but slower search (D = 0)  #      slow         => bool           more accurate but slower search (D = 0)
48  #      user_lengths => bool           use supplied branch lengths  #      user_lengths => bool           use supplied branch lengths
# Line 63  Line 63 
63  #  Options that do not require other data:  #  Options that do not require other data:
64  #    G (global search toggle)  #    G (global search toggle)
65  #    L (user lengths toggle)  #    L (user lengths toggle)
66  #    P (JTT / PAM toggle)  #    P (JTT / PMB / PAM cycle)
67  #    S (slow and accurate)  #    S (slow and accurate)
68  #    U (requires intree file)  #    U (requires intree file)
69  #    W (requires weights file)  #    W (requires weights file)
# Line 98  Line 98 
98  use gjonewicklib qw( gjonewick_to_overbeek  use gjonewicklib qw( gjonewick_to_overbeek
99                       newick_is_unrooted                       newick_is_unrooted
100                       newick_relabel_nodes                       newick_relabel_nodes
101                         newick_rescale_branches
102                       newick_tree_length                       newick_tree_length
103                       overbeek_to_gjonewick                       overbeek_to_gjonewick
104                       parse_newick_tree_str                       parse_newick_tree_str
# Line 148  Line 149 
149      #  Process proml options:      #  Process proml options:
150      #---------------------------------------------------------------------------      #---------------------------------------------------------------------------
151    
152      my $categories   = $options{ categories };  # [ n_cat, [ cat_rates ], site_cats ]      #  [ [ cat_rate1, ... ], site_categories ]
153        #  Original format expected first field to be number of categories (which
154        #  is redundant).  Handling that form is what the shift if all about.
155    
156        my $categories   = $options{ categories };  # [ [ cat_rate1, ... ], site_categories ]
157      if ( $categories )      if ( $categories )
158      {      {
159          if ( ref( $categories ) ne 'ARRAY'          if ( ref( $categories ) ne 'ARRAY'
160            || @$categories != 3            || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
161            || $categories->[0] < 1 || $categories->[0] > 9            || ref( $categories->[0] ) ne 'ARRAY'
           || ref( $categories->[1] ) ne 'ARRAY'  
           || @{$categories->[1]} != $categories->[0]  
162             )             )
163          {          {
164              print STDERR "proml::proml categories option value must be [ n_cat, [ cat_rates ], site_cats ]\n";              print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
165              return ();              return ();
166          }          }
167    
168          #  Rate values cannot have very many decimal places or proml can't read it:          #  Rate values cannot have very many decimal places or proml can't read it:
169    
170          @{$categories->[1]} = map { sprintf "%.6f", $_ } @{$categories->[1]};          @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
171      }      }
172    
173      my $coef_of_var  = $options{ coef_of_var }      my $coef_of_var  = $options{ coef_of_var }
# Line 329  Line 332 
332      unlink 'outfile' if -f 'outfile';  # Just checking      unlink 'outfile' if -f 'outfile';  # Just checking
333      unlink 'outtree' if -f 'outtree';  # ditto      unlink 'outtree' if -f 'outtree';  # ditto
334    
335      &write_infile( @align ) or print STDERR "proml::proml: Could write infile\n"      &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
336                                 and chdir $cwd                                 and chdir $cwd
337                                 and return ();                                 and return ();
338    
339      # open( PROML, "| $program > /dev/null" ) or print STDERR "proml::proml: Could not open pipe to $program\n"      open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
     open( PROML, ">params" ) or print STDERR "proml::proml: Could not open pipe to $program\n"  
340                                                 and chdir $cwd                                                 and chdir $cwd
341                                                 and return ();                                                 and return ();
342    
343    
344      #  Start sending optoins ot program:      #  Start writing options for program:
345    
346      if ( $categories )      if ( $categories )
347      {      {
348          &write_categories( $categories->[2] ) or print STDERR "proml::proml: Could not write categories\n"          &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
349                                                   and chdir $cwd                                                   and chdir $cwd
350                                                   and return ();                                                   and return ();
351          print PROML "C\n",          print PROML "C\n",
352                      "$categories->[0]\n",                      scalar @{$categories->[0]}, "\n",
353                      join( ' ', @{ $categories->[1] } ), "\n";                      join( ' ', @{ $categories->[0] } ), "\n";
354      }      }
355    
356      if ( $invar_frac || $coef_of_var )      if ( $invar_frac || $coef_of_var )
# Line 370  Line 372 
372    
373      if ( @user_trees )      if ( @user_trees )
374      {      {
375          &write_intree( @user_trees ) or print STDERR "proml::proml: Could write intree\n"          &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
376                                          and chdir $cwd                                          and chdir $cwd
377                                          and return ();                                          and return ();
378          print PROML "U\n";          print PROML "U\n";
# Line 380  Line 382 
382    
383      if ( $weights )      if ( $weights )
384      {      {
385          &write_weights( $weights ) or print STDERR "proml::proml: Could write weights\n"          &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
386                                        and chdir $cwd                                        and chdir $cwd
387                                        and return ();                                        and return ();
388          print PROML "W\n";          print PROML "W\n";
389      }      }
390    
391      #  All the options are sent, try to lauch the run:      #  All the options are written, try to lauch the run:
392    
393      print PROML "Y\n";      print PROML "Y\n";
394    
# Line 427  Line 429 
429      #  Returned trees have our labels, and branch lengths that are in % change,      #  Returned trees have our labels, and branch lengths that are in % change,
430      #  not the more usual expected number per position:      #  not the more usual expected number per position:
431    
432      my @trees = map { gjonewicklib::newick_rescale_branches( $_, 0.01 );      my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
                       gjonewicklib::newick_relabel_nodes( $_, \%id )  
                     }  
433                  @trees;                  @trees;
434    
435      if ( $tree_format =~ m/overbeek/i )      if ( $tree_format =~ m/overbeek/i )
# Line 446  Line 446 
446  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
447  #  A perl interface for using proml to estimate site-specific rates of change  #  A perl interface for using proml to estimate site-specific rates of change
448  #  #
449  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree,  %proml_opts )
450    #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
451  #  #
452  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
453  #  #
454  #  $alignment = [ [ id, def, seq ], ... ]  #  $alignment = [ [ id, def, seq ], ... ]
455  #             or  #             or
# Line 463  Line 464 
464  {  {
465      my ( $align, $tree, @proml_opts ) = @_;      my ( $align, $tree, @proml_opts ) = @_;
466    
467      my @align = @$align;      my ( $seq, $id );
468        my %local_id;
469        my $local_id = 'seq0000000';
470        my @align = map { $id = $_->[0];
471                          $local_id{ $id } = ++$local_id;
472                          $seq = $_->[-1];
473                          $seq =~ s/[BJOUZ]/X/gi;  # Bad letters go to X
474                          $seq =~ s/[^A-Z]/-/gi;   # Anything else becomes -
475                          [ $local_id, $seq ]
476                        } @$align;
477    
478      #  Make the tree into a gjonewick tree so that we can manipulate it:      #  Make the tree a gjonewick tree, uproot it, and change to the local ids.
479    
480      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree
481      {      {
482          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
483      }      }
484        else
485        {
486            $tree = gjonewicklib::copy_newick_tree( $tree );
487        }
488    
489        $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
490    
491        gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
492    
493      #  The minimum rate will be 1/2 change per total tree branch length.      #  The minimum rate will be 1/2 change per total tree branch length.
494      #  This needs to be checked for proml.  The intent is that he optimal      #  This needs to be checked for proml.  The intent is that he optimal
# Line 478  Line 496 
496    
497      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
498    
     print STDERR "Length = ", gjonewicklib::newick_tree_length( $tree ), "; kmin = $kmin\n"; ## DEBUG ##  
   
499      #  Generate "rate variation" by rescaling the supplied tree.  We could use a      #  Generate "rate variation" by rescaling the supplied tree.  We could use a
500      #  finer grain estimator, then categorize the inferred values.  This might      #  finer grain estimator, then categorize the inferred values.  This might
501      #  work slightly better (this is what DNArates currently does).      #  work slightly better (this is what DNArates currently does).
# Line 499  Line 515 
515      #  Adjust (a copy of) the proml opts:      #  Adjust (a copy of) the proml opts:
516    
517      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
518      $proml_opts{ alpha        } =  undef;  
     $proml_opts{ categories   } =  0;  
     $proml_opts{ coef_of_var  } =  0;  
     $proml_opts{ gamma_bins   } =  0;  
     $proml_opts{ invar_frac   } =  0;  
     $proml_opts{ jumble_seed  } =  0;  
     $proml_opts{ n_jumble     } =  0;  
     $proml_opts{ rearrange    } =  0;  
519      $proml_opts{ user_lengths } =  1;      $proml_opts{ user_lengths } =  1;
520      $proml_opts{ user_trees   } = \@trees;      $proml_opts{ user_trees   } = \@trees;
521      $proml_opts{ tree_format  } = 'gjo';      $proml_opts{ tree_format  } = 'gjo';
522    
523      #  Work throught the sites, finding their optimal categories:      delete $proml_opts{ alpha       } if exists $proml_opts{ alpha       };
524        delete $proml_opts{ categories  } if exists $proml_opts{ categories  };
525        delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
526        delete $proml_opts{ gamma_bins  } if exists $proml_opts{ gamma_bins  };
527        delete $proml_opts{ invar_frac  } if exists $proml_opts{ invar_frac  };
528        delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
529        delete $proml_opts{ n_jumble    } if exists $proml_opts{ n_jumble    };
530        delete $proml_opts{ rearrange   } if exists $proml_opts{ rearrange   };
531    
532        #  Work throught the sites, finding their optimal rates/categories:
533    
534      my @categories;      my @categories;
535      my @weights;      my @weights;
# Line 535  Line 553 
553                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods
554                             @cat_vals;                             @cat_vals;
555    
556              printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##  #           printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
557              push @categories, $best->[0];              push @categories, $best->[0];
558              push @weights,    1;              push @weights,    1;
559          }          }
# Line 556  Line 574 
574    
575      #  Return category and weight data:      #  Return category and weight data:
576    
577      ( [ scalar @rates, \@rates, join( '', @categories ) ], join( '', @weights ) )      ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
578  }  }
579    
580    
# Line 605  Line 623 
623    
624  sub read_outfile  sub read_outfile
625  {  {
     my @likelihoods;  
626      open( OUTFILE, '<outfile' ) or return ();      open( OUTFILE, '<outfile' ) or return ();
627      while ( defined( $_ = <OUTFILE> ) )      my @likelihoods = map  { chomp; s/.* //; $_ }
628      {                        grep { /^Ln Likelihood/ }
629          next if ! m/^Ln /;                        <OUTFILE>;
         chomp;  
         s/.* //;  
         push @likelihoods, $_;  
     }  
630      close( OUTFILE );      close( OUTFILE );
631      return @likelihoods;      return @likelihoods;
632  }  }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3