[Bio] / FigKernelPackages / proml.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/proml.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sat Jan 13 03:22:44 2007 UTC revision 1.7, Wed Jan 31 22:38:39 2007 UTC
# Line 15  Line 15 
15  #  #
16  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )
17  #  #
18  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
19  #  #
20  #===============================================================================  #===============================================================================
21  #  #
# Line 33  Line 33 
33  #    For proml:  #    For proml:
34  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param  #      alignment    => \@alignment    the way to supply the alignment as an option, rather than first param
35  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)  #      alpha        => float          alpha parameter of gamma distribution (0.5 - inf)
36  #      categories   => [ n, [ rates ], site_categories ]  #      categories   => [ [ rate1, ... ], site_categories ]
37  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)  #      coef_of_var  => float          1/sqrt(alpha) for gamma distribution (D = 0)
38  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)  #      gamma_bins   => int            number of rate categories used to approximate gamma (D=5)
39  #      global       => bool           global rearrangements  #      global       => bool           global rearrangements
# Line 42  Line 42 
42  #      model        => model          evolution model JTT (D) | PMB | PAM  #      model        => model          evolution model JTT (D) | PMB | PAM
43  #      n_jumble     => int            number of jumbles  #      n_jumble     => int            number of jumbles
44  #      persistance  => float          persistance length of rate category  #      persistance  => float          persistance length of rate category
45  #      rate_hmm     => [ n_rates, [ rates ], [ probabilies ] ]  #      rate_hmm     => [ [ rate, prior_prob ] ... ]   # not implimented
46  #      rearrange    => [ trees ]      rearrange user trees  #      rearrange    => [ trees ]      rearrange user trees
47  #      slow         => bool           more accurate but slower search (D = 0)  #      slow         => bool           more accurate but slower search (D = 0)
48  #      user_lengths => bool           use supplied branch lengths  #      user_lengths => bool           use supplied branch lengths
# Line 63  Line 63 
63  #  Options that do not require other data:  #  Options that do not require other data:
64  #    G (global search toggle)  #    G (global search toggle)
65  #    L (user lengths toggle)  #    L (user lengths toggle)
66  #    P (JTT / PAM toggle)  #    P (JTT / PMB / PAM cycle)
67  #    S (slow and accurate)  #    S (slow and accurate)
68  #    U (requires intree file)  #    U (requires intree file)
69  #    W (requires weights file)  #    W (requires weights file)
# Line 98  Line 98 
98  use gjonewicklib qw( gjonewick_to_overbeek  use gjonewicklib qw( gjonewick_to_overbeek
99                       newick_is_unrooted                       newick_is_unrooted
100                       newick_relabel_nodes                       newick_relabel_nodes
101                         newick_rescale_branches
102                       newick_tree_length                       newick_tree_length
103                       overbeek_to_gjonewick                       overbeek_to_gjonewick
104                       parse_newick_tree_str                       parse_newick_tree_str
# Line 148  Line 149 
149      #  Process proml options:      #  Process proml options:
150      #---------------------------------------------------------------------------      #---------------------------------------------------------------------------
151    
152      my $categories   = $options{ categories };  # [ n_cat, [ cat_rates ], site_cats ]      #  [ [ cat_rate1, ... ], site_categories ]
153        #  Original format expected first field to be number of categories (which
154        #  is redundant).  Handling that form is what the shift if all about.
155    
156        my $categories   = $options{ categories };  # [ [ cat_rate1, ... ], site_categories ]
157      if ( $categories )      if ( $categories )
158      {      {
159          if ( ref( $categories ) ne 'ARRAY'          if ( ref( $categories ) ne 'ARRAY'
160            || @$categories != 3            || ! ( ( @$categories == 2 ) || ( ( @$categories == 3 ) && ( shift @$categories ) ) )
161            || $categories->[0] < 1 || $categories->[0] > 9            || ref( $categories->[0] ) ne 'ARRAY'
           || ref( $categories->[1] ) ne 'ARRAY'  
           || @{$categories->[1]} != $categories->[0]  
162             )             )
163          {          {
164              print STDERR "proml::proml categories option value must be [ n_cat, [ cat_rates ], site_cats ]\n";              print STDERR "proml::proml categories option value must be [ [ cat_rate1, ... ], site_categories ]\n";
165              return ();              return ();
166          }          }
167    
168          #  Rate values cannot have very many decimal places or proml can't read it:          #  Rate values cannot have very many decimal places or proml can't read it:
169    
170          @{$categories->[1]} = map { sprintf "%.6f", $_ } @{$categories->[1]};          @{$categories->[0]} = map { sprintf "%.6f", $_ } @{$categories->[0]};
171      }      }
172    
173      my $coef_of_var  = $options{ coef_of_var }      my $coef_of_var  = $options{ coef_of_var }
# Line 329  Line 332 
332      unlink 'outfile' if -f 'outfile';  # Just checking      unlink 'outfile' if -f 'outfile';  # Just checking
333      unlink 'outtree' if -f 'outtree';  # ditto      unlink 'outtree' if -f 'outtree';  # ditto
334    
335      &write_infile( @align ) or print STDERR "proml::proml: Could write infile\n"      &write_infile( @align ) or print STDERR "proml::proml: Could not write infile\n"
336                                 and chdir $cwd                                 and chdir $cwd
337                                 and return ();                                 and return ();
338    
339      # open( PROML, "| $program > /dev/null" ) or print STDERR "proml::proml: Could not open pipe to $program\n"      open( PROML, ">params" ) or print STDERR "proml::proml: Could not open command file for $program\n"
     open( PROML, ">params" ) or print STDERR "proml::proml: Could not open pipe to $program\n"  
340                                                 and chdir $cwd                                                 and chdir $cwd
341                                                 and return ();                                                 and return ();
342    
343    
344      #  Start sending optoins ot program:      #  Start writing options for program:
345    
346      if ( $categories )      if ( $categories )
347      {      {
348          &write_categories( $categories->[2] ) or print STDERR "proml::proml: Could not write categories\n"          &write_categories( $categories->[1] ) or print STDERR "proml::proml: Could not write categories\n"
349                                                   and chdir $cwd                                                   and chdir $cwd
350                                                   and return ();                                                   and return ();
351          print PROML "C\n",          print PROML "C\n",
352                      "$categories->[0]\n",                      scalar @{$categories->[0]}, "\n",
353                      join( ' ', @{ $categories->[1] } ), "\n";                      join( ' ', @{ $categories->[0] } ), "\n";
354      }      }
355    
356      if ( $invar_frac || $coef_of_var )      if ( $invar_frac || $coef_of_var )
# Line 366  Line 368 
368      print PROML "P\n"    if $model =~ m/PMB/i;      print PROML "P\n"    if $model =~ m/PMB/i;
369      print PROML "P\nP\n" if $model =~ m/PAM/i;      print PROML "P\nP\n" if $model =~ m/PAM/i;
370    
     print PROML "S\n" if $slow;  
   
371      if ( @user_trees )      if ( @user_trees )
372      {      {
373          &write_intree( @user_trees ) or print STDERR "proml::proml: Could write intree\n"          &write_intree( @user_trees ) or print STDERR "proml::proml: Could not write intree\n"
374                                          and chdir $cwd                                          and chdir $cwd
375                                          and return ();                                          and return ();
376          print PROML "U\n";          print PROML "U\n";
377          print PROML "V\n" if $rearrange || $global;          print PROML "V\n" if $rearrange || $global;
378          print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;          print PROML "L\n" if $user_lengths && ! $rearrange && ! $global;
379      }      }
380        elsif ( $slow )  # Slow and user trees are mutually exclusive
381        {
382            print PROML "S\n";
383        }
384    
385      if ( $weights )      if ( $weights )
386      {      {
387          &write_weights( $weights ) or print STDERR "proml::proml: Could write weights\n"          &write_weights( $weights ) or print STDERR "proml::proml: Could not write weights\n"
388                                        and chdir $cwd                                        and chdir $cwd
389                                        and return ();                                        and return ();
390          print PROML "W\n";          print PROML "W\n";
391      }      }
392    
393      #  All the options are sent, try to lauch the run:      #  All the options are written, try to launch the run:
394    
395      print PROML "Y\n";      print PROML "Y\n";
396    
# Line 404  Line 408 
408          print PROML "$gamma_bins\n";          print PROML "$gamma_bins\n";
409          print PROML "$invar_frac\n" if $invar_frac;          print PROML "$invar_frac\n" if $invar_frac;
410      }      }
411      elsif ( $user_trees )  
412        if ( $user_trees )
413      {      {
414          print PROML "13\n";     #  Random number seed of unknown use          print PROML "13\n";     #  Random number seed of unknown use
415      }      }
# Line 427  Line 432 
432      #  Returned trees have our labels, and branch lengths that are in % change,      #  Returned trees have our labels, and branch lengths that are in % change,
433      #  not the more usual expected number per position:      #  not the more usual expected number per position:
434    
435      my @trees = map { gjonewicklib::newick_rescale_branches( $_, 0.01 );      my @trees = map { gjonewicklib::newick_relabel_nodes( $_, \%id ) }
                       gjonewicklib::newick_relabel_nodes( $_, \%id )  
                     }  
436                  @trees;                  @trees;
437    
438      if ( $tree_format =~ m/overbeek/i )      if ( $tree_format =~ m/overbeek/i )
# Line 446  Line 449 
449  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
450  #  A perl interface for using proml to estimate site-specific rates of change  #  A perl interface for using proml to estimate site-specific rates of change
451  #  #
452  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, proml_opts )  #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree,  %proml_opts )
453    #     ( $categories, $weights ) = estimate_protein_site_rates( \@align, $tree, \%proml_opts )
454  #  #
455  #     $categories = [ $n_categories, [ $rates ], $site_categories ];  #     $categories = [ [ $rate1, ... ], $site_categories ];
456  #  #
457  #  $alignment = [ [ id, def, seq ], ... ]  #  $alignment = [ [ id, def, seq ], ... ]
458  #             or  #             or
# Line 463  Line 467 
467  {  {
468      my ( $align, $tree, @proml_opts ) = @_;      my ( $align, $tree, @proml_opts ) = @_;
469    
470      my @align = @$align;      my ( $seq, $id );
471        my %local_id;
472        my $local_id = 'seq0000000';
473        my @align = map { $id = $_->[0];
474                          $local_id{ $id } = ++$local_id;
475                          $seq = $_->[-1];
476                          $seq =~ s/[BJOUZ]/X/gi;  # Bad letters go to X
477                          $seq =~ s/[^A-Z]/-/gi;   # Anything else becomes -
478                          [ $local_id, $seq ]
479                        } @$align;
480    
481      #  Make the tree into a gjonewick tree so that we can manipulate it:      #  Make the tree a gjonewick tree, uproot it, and change to the local ids.
482    
483      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree      if ( ref( $tree->[0] ) ne 'ARRAY' )   # overbeek tree
484      {      {
485          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );          $tree = gjonewicklib::overbeek_to_gjonewick( $tree );
486      }      }
487        else
488        {
489            $tree = gjonewicklib::copy_newick_tree( $tree );
490        }
491    
492        $tree = gjonewicklib::uproot_newick( $tree ) if ! gjonewicklib::newick_is_unrooted( $tree );
493    
494        gjonewicklib::newick_relabel_nodes( $tree, \%local_id );
495    
496      #  The minimum rate will be 1/2 change per total tree branch length.      #  The minimum rate will be 1/2 change per total tree branch length.
497      #  This needs to be checked for proml.  The intent is that he optimal      #  This needs to be checked for proml.  The intent is that he optimal
# Line 478  Line 499 
499    
500      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );      my $kmin = 1 / ( gjonewicklib::newick_tree_length( $tree ) || 1 );
501    
     print STDERR "Length = ", gjonewicklib::newick_tree_length( $tree ), "; kmin = $kmin\n"; ## DEBUG ##  
   
502      #  Generate "rate variation" by rescaling the supplied tree.  We could use a      #  Generate "rate variation" by rescaling the supplied tree.  We could use a
503      #  finer grain estimator, then categorize the inferred values.  This might      #  finer grain estimator, then categorize the inferred values.  This might
504      #  work slightly better (this is what DNArates currently does).      #  work slightly better (this is what DNArates currently does).
# Line 499  Line 518 
518      #  Adjust (a copy of) the proml opts:      #  Adjust (a copy of) the proml opts:
519    
520      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;      my %proml_opts = ( ref( $proml_opts[0] ) eq 'HASH' ) ? %{ $proml_opts[0] } : @proml_opts;
521      $proml_opts{ alpha        } =  undef;  
     $proml_opts{ categories   } =  0;  
     $proml_opts{ coef_of_var  } =  0;  
     $proml_opts{ gamma_bins   } =  0;  
     $proml_opts{ invar_frac   } =  0;  
     $proml_opts{ jumble_seed  } =  0;  
     $proml_opts{ n_jumble     } =  0;  
     $proml_opts{ rearrange    } =  0;  
522      $proml_opts{ user_lengths } =  1;      $proml_opts{ user_lengths } =  1;
523      $proml_opts{ user_trees   } = \@trees;      $proml_opts{ user_trees   } = \@trees;
524      $proml_opts{ tree_format  } = 'gjo';      $proml_opts{ tree_format  } = 'gjo';
525    
526      #  Work throught the sites, finding their optimal categories:      delete $proml_opts{ alpha       } if exists $proml_opts{ alpha       };
527        delete $proml_opts{ categories  } if exists $proml_opts{ categories  };
528        delete $proml_opts{ coef_of_var } if exists $proml_opts{ coef_of_var };
529        delete $proml_opts{ gamma_bins  } if exists $proml_opts{ gamma_bins  };
530        delete $proml_opts{ invar_frac  } if exists $proml_opts{ invar_frac  };
531        delete $proml_opts{ jumble_seed } if exists $proml_opts{ jumble_seed };
532        delete $proml_opts{ n_jumble    } if exists $proml_opts{ n_jumble    };
533        delete $proml_opts{ rearrange   } if exists $proml_opts{ rearrange   };
534    
535        #  Work throught the sites, finding their optimal rates/categories:
536    
537      my @categories;      my @categories;
538      my @weights;      my @weights;
# Line 535  Line 556 
556                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods                             map  { [ $_, @{ shift @results }[1] ] }  # get the likelihoods
557                             @cat_vals;                             @cat_vals;
558    
559              printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##  #           printf STDERR "%6d  %2d => %12.4f\n", $i+1, @$best; ## DEBUG ##
560              push @categories, $best->[0];              push @categories, $best->[0];
561              push @weights,    1;              push @weights,    1;
562          }          }
# Line 556  Line 577 
577    
578      #  Return category and weight data:      #  Return category and weight data:
579    
580      ( [ scalar @rates, \@rates, join( '', @categories ) ], join( '', @weights ) )      ( [ \@rates, join( '', @categories ) ], join( '', @weights ) )
581  }  }
582    
583    
# Line 605  Line 626 
626    
627  sub read_outfile  sub read_outfile
628  {  {
     my @likelihoods;  
629      open( OUTFILE, '<outfile' ) or return ();      open( OUTFILE, '<outfile' ) or return ();
630      while ( defined( $_ = <OUTFILE> ) )      my @likelihoods = map  { chomp; s/.* //; $_ }
631      {                        grep { /^Ln Likelihood/ }
632          next if ! m/^Ln /;                        <OUTFILE>;
         chomp;  
         s/.* //;  
         push @likelihoods, $_;  
     }  
633      close( OUTFILE );      close( OUTFILE );
634      return @likelihoods;      return @likelihoods;
635  }  }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3