[Bio] / StandaloneTools / get_fasta_for_tbl_entries.pl Repository:
ViewVC logotype

View of /StandaloneTools/get_fasta_for_tbl_entries.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Mon Jun 21 21:46:19 2004 UTC (15 years, 3 months ago) by olson
Branch: MAIN, gordon
CVS Tags: gordon1, HEAD
Changes since 1.1: +0 -0 lines
Initial import of gordon's utilities.

#!/usr/bin/perl -w

use strict;

# usage: get_fasta_for_tbl_entries Contigs [Stops] < tbl > fasta

my($contigs,$tbl,$code,$stop,$dna,$dnaR,$prot);
my(%contigs,$id,$seq,$beg,$end,$contig);

($contigs = shift @ARGV) 
    || die "usage: get_fasta_for_tbl_entries Contigs [Stops] < tbl > fasta";

$code = &standard_genetic_code;
my $stops = "";
if (@ARGV > 0) 
{ 
    $stops = shift @ARGV; 
    $stops = "\'$stops\'";
    foreach $stop (("TGA","TAA","TAG"))
    {
	if ($stops !~ /$stop/i)
	{
	    $code->{$stop} = "X";
	}
    }
}

open(OLD,$contigs) || die "aborted";
$/ = "\n>";
while (defined($_ = <OLD>))
{
    chomp;
    if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
    {
	$id  =  $1;
	$seq =  $2;
	$seq =~ s/\s//g;
	$contigs{$id} = $seq;
    }
}
close(OLD);
$/ = "\n";

while (defined($_ = <STDIN>))
{
    if ($_ =~ /^(\S+)\t([^,\t ]+)\_(\d+)_(\d+)/)
    {
	$id = $1;
	$contig = $2;
	$beg = $3;
	$end = $4;
	$seq = $contigs{$contig};
	if ($beg < $end)
	{
	    $dna = substr($seq,$beg-1,($end+1-$beg));
	    $prot = &translate($dna,$code,1);
	}
	else
	{
	    $dna = substr($seq,$end-1,($beg+1-$end));
	    $dnaR = &rev_comp(\$dna);
	    $prot = &translate($$dnaR,$code,1);
	}
	print ">$id\n$prot\n";
    }
    else
    {
	die "bad: $_";
    }
}

sub standard_genetic_code 
{
    my $code = {};

    $code->{"AAA"} = "K";
    $code->{"AAC"} = "N";
    $code->{"AAG"} = "K";
    $code->{"AAT"} = "N";
    $code->{"ACA"} = "T";
    $code->{"ACC"} = "T";
    $code->{"ACG"} = "T";
    $code->{"ACT"} = "T";
    $code->{"AGA"} = "R";
    $code->{"AGC"} = "S";
    $code->{"AGG"} = "R";
    $code->{"AGT"} = "S";
    $code->{"ATA"} = "I";
    $code->{"ATC"} = "I";
    $code->{"ATG"} = "M";
    $code->{"ATT"} = "I";
    $code->{"CAA"} = "Q";
    $code->{"CAC"} = "H";
    $code->{"CAG"} = "Q";
    $code->{"CAT"} = "H";
    $code->{"CCA"} = "P";
    $code->{"CCC"} = "P";
    $code->{"CCG"} = "P";
    $code->{"CCT"} = "P";
    $code->{"CGA"} = "R";
    $code->{"CGC"} = "R";
    $code->{"CGG"} = "R";
    $code->{"CGT"} = "R";
    $code->{"CTA"} = "L";
    $code->{"CTC"} = "L";
    $code->{"CTG"} = "L";
    $code->{"CTT"} = "L";
    $code->{"GAA"} = "E";
    $code->{"GAC"} = "D";
    $code->{"GAG"} = "E";
    $code->{"GAT"} = "D";
    $code->{"GCA"} = "A";
    $code->{"GCC"} = "A";
    $code->{"GCG"} = "A";
    $code->{"GCT"} = "A";
    $code->{"GGA"} = "G";
    $code->{"GGC"} = "G";
    $code->{"GGG"} = "G";
    $code->{"GGT"} = "G";
    $code->{"GTA"} = "V";
    $code->{"GTC"} = "V";
    $code->{"GTG"} = "V";
    $code->{"GTT"} = "V";
    $code->{"TAA"} = "*";
    $code->{"TAC"} = "Y";
    $code->{"TAG"} = "*";
    $code->{"TAT"} = "Y";
    $code->{"TCA"} = "S";
    $code->{"TCC"} = "S";
    $code->{"TCG"} = "S";
    $code->{"TCT"} = "S";
    $code->{"TGA"} = "*";
    $code->{"TGC"} = "C";
    $code->{"TGG"} = "W";
    $code->{"TGT"} = "C";
    $code->{"TTA"} = "L";
    $code->{"TTC"} = "F";
    $code->{"TTG"} = "L";
    $code->{"TTT"} = "F";
    
    return $code;
}

sub translate 
{
    my( $dna,$code,$start) = @_;
    my( $i,$j,$ln );
    my( $x,$y );
    my( $prot );

    if (! defined($code))
    {
	$code = &FIG::standard_genetic_code;
    }
    $ln = length($dna);
    $prot = "X" x ($ln/3);
    $dna =~ tr/a-z/A-Z/;

    for ($i=0,$j=0; ($i < ($ln-2)); $i += 3,$j++)
    {
	$x = substr($dna,$i,3);
        if ($y = $code->{$x})
        {
	    substr($prot,$j,1) = $y;
        }
    }
    
    if (($start) && ($ln >= 3) && (substr($dna,0,3) =~ /^[GT]TG$/))
    {
	substr($prot,0,1) = 'M';
    }
    return $prot;
}

sub reverse_comp 
{
    my($seq) = @_;

    return ${&rev_comp(\$seq)};
}

sub rev_comp 
{
    my( $seqP ) = @_;
    my( $rev  );

    $rev =  reverse( $$seqP );
    $rev =~ tr/a-z/A-Z/;
    $rev =~ tr/ACGTUMRWSYKBDHV/TGCAAKYWSRMVHDB/;
    return \$rev;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3