[Bio] / FigKernelScripts / match_to_genome_frequencies.pl Repository:
ViewVC logotype

View of /FigKernelScripts/match_to_genome_frequencies.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Sep 17 22:56:58 2012 UTC (7 years, 2 months ago) by golsen
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +10 -12 lines
Updated to use GenomeCodonUsages.pm

#!/usr/bin/env perl -w
#
#  match_to_genome_frequencies < freq
#
use strict;
use gjocodonlib;
use GenomeCodonUsages;

my $usage = <<'End_of_Usage';

Find the closest match(es) of one or more sets of codon usage frequencies in
a compilation of genome codon usage frequencies.

Usage:  match_to_genome_frequencies    [options] < freqs > best_matching_genomes

        match_to_genome_frequencies -f [options] < freqs > best_matching_freqs

Options:

   -a           Include nonnative (alien) gene modal codon usages (if available)
   -d distfile  Report codon frequencies to STDOUT and distances to distfile
   -f           Report codon usage frequencies of best matching genomic usages
                    (default is text report of genome ids and names)
   -h           Include high expression codon usages (matched as point estimate)
   -n number    Number of top matches to report (D = 3)

End_of_Usage

my $alien    = 0;
my $distfile = undef;
my $high_exp = 0;
my $freqs    = 0;
my $labels   = undef;  # Legacy function
my $n_top    = 3;

while ( @ARGV && $ARGV[0] =~ s/^-// )
{
    $_ = shift;
    if ( s/^d// ) { $distfile = /./ ? $_ : shift; next }
    if ( s/^l// ) { $labels   = /./ ? $_ : shift; next }
    if ( s/^n// ) { $n_top    = /./ ? $_ : shift; next }

    if ( s/a//g ) { $alien    = 1 }
    if ( s/f//g ) { $freqs    = 1 }
    if ( s/h//g ) { $high_exp = 1 }
    if ( $_ )
    {
        print STDERR "Bad flag: '$_'\n";
        print STDERR $usage;
        exit;
    }
}

if ( $labels )
{
    open LBL, ">$labels" or die "Could not open label file $labels\n$usage";
}

my $distFH;
if ( $distfile )
{
    open $distFH, ">$distfile" or die "Could not open distance file $distfile\n$usage";
    $freqs = 1;
}
elsif ( ! $freqs )
{
    $distFH = \*STDOUT;
}

my @genomes = map { my $name = "$_->{gid} $_->{gname}";
                                                       [ "$name -- Genome mode",          $_->{ modal } ],
                    ( $high_exp && $_->{ high_expr } ? [ "$name -- High-expression mode", $_->{ high_expr } ] : () ),
                    ( $alien    && $_->{ nonnative } ? [ "$name -- Nonnative mode",       $_->{ nonnative } ] : () )
                   }
              values %GenomeCodonUsages::genome_freqs;

my $n = 'freq_00000';
my @freqs = map  { $n++; $_->[2] ||= $n; $_ }
            grep { $_->[0] }
            gjocodonlib::read_frequencies_scr_label();

my %have;
foreach ( @freqs )
{
    my ( $freq, undef, $name ) = @$_;

    my @dists = sort { $a->[1] <=> $b->[1] }
                map { [ $_, gjocodonlib::codon_freq_distance_2( $freq, $_->[1] ) ] }
                @genomes;
    splice @dists, $n_top if @dists > $n_top;

    if ( $freqs )
    {
        report( $name, $freq );
        foreach ( @dists ) { report( @{ $_->[0] } ) }
    }

    if ( $distFH )
    {
        print $distFH "$name:\n";
        print $distFH map { sprintf "%9.4f\t%s\n", $_->[1], $_->[0]->[0] } @dists;
        print $distFH "\n";
    }
}

close $distFH if $distfile;
close LBL     if $labels;
exit;


sub report
{
    my ( $id, $freq ) = @_;
    return if $have{ $id }++;
    gjocodonlib::report_frequencies( $freq, $id );
    if ( $labels ) { my $lbl = $id; $lbl =~ s/ /\t/; print LBL "$lbl\n" }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3