[Bio] / FigKernelScripts / pg_get_codon_usage_stats.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_get_codon_usage_stats.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Mar 20 14:11:11 2013 UTC (6 years, 8 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
add codon usage

use strict;
use Data::Dumper;
use gjoseqlib;
use Getopt::Long;
use SeedEnv;

my $usage = "usage: pg_get_codon_usage -d Data\n";
my $dataD;
my $rc  = GetOptions('d=s' => \$dataD,);

if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

if (! -d "$dataD/CodonUsage")
{
    mkdir("$dataD/CodonUsage",0777);
}
my $funcs = &load_funcs($dataD);
foreach $_ (`cut -f3,4 $dataD/genomes.with.job.and.genomeID`)
{
    chop;
    my($job,$genome) = split(/\t/,$_);
    print STDERR "job=$job genome=$genome\n";
    if (! -s "$dataD/CodonUsage/$genome")
    {
	mkdir("$dataD/CodonUsage/$genome",0777);
	my $contigs = &load_contigs($job,$genome);
	my $tmp = "tmp.$$.codon.usage";
	open(TMP,">$tmp") || die "could not open $tmp";
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/$genome/Features/peg/tbl`)
	{
	    if ($_ =~ /^(\S+)\t(\S+)/)
	    {
		my($peg,$loc) = ($1,$2);
		if ($loc =~ /^(\S+)_(\d+)_(\d+)$/)
		{
		    my($contig,$beg,$end) = ($1,$2,$3);
		    my $dna;
		    my $contig_dna = $contigs->{$contig};
		    if ($contig_dna && ($beg > 0) && ($end > 0) && ($beg < length($contig_dna)) && ($end < length($contig_dna)))
		    {
			if ($beg < $end)
			{
			    $dna = substr($contig_dna,$beg-1,($end+1-$beg));
			}
			else
			{
			    $dna = &SeedUtils::rev_comp(substr($contig_dna,$end-1,($beg+1-$end)));
			}
			my $f = $funcs->{$peg};
			print TMP ">$peg $f\n$dna\n";
		    }
		}
	    }
	}
	close(TMP);
	if (-s $tmp)
	{
	    &SeedUtils::run("match_to_axes -f $tmp > $dataD/CodonUsage/$genome/$genome.html");
	}
	unlink($tmp);
    }
}

sub load_contigs {
    my($job,$genome) = @_;

    my $file = "/vol/rast-prod/jobs/$job/rp/$genome/contigs";
    my @seqs = &gjoseqlib::read_fasta($file);
    my %contigs = map { ($_->[0] => $_->[2]) } @seqs;
    return \%contigs;
}

sub load_funcs {
    my($dataD) = @_;

    my $to_func = {};

    foreach my $job (`cut -f 3 $dataD/genomes.with.job`)
    {
	chop $job;
	foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/proposed*functions`)
	{
	    if ($_ =~ /^(\S+)\t(\S.*\S)/)
	    {
		$to_func->{$1} = $2;
	    }
	}
    }
    return $to_func;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3