[Bio] / FigMetagenomeTools / genbank2table.pl Repository:
ViewVC logotype

View of /FigMetagenomeTools/genbank2table.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Mon Feb 19 17:15:26 2007 UTC (13 years ago) by olson
Branch: x, MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, y, HEAD
Changes since 1.1: +0 -0 lines
Initial import

#!/usr/bin/env perl

# get proteins out of a genbank file

use Bio::SeqIO;
use strict;

my $usage=<<EOF;
$0 <list of genbankfiles>

EOF

die $usage unless ($ARGV[0]);

open(FA, ">fasta") || die "Can't open fasta";
open(PR, ">function") || die "Can't open function";
my $seedid=&seedid;
if ($seedid) {open(CO, ">contigs") || die "Can't open contigs"}

my $c;
foreach my $file (@ARGV)
{
	my $sio=Bio::SeqIO->new(-file=>$file, -format=>'genbank');
	while (my $seq=$sio->next_seq) {
		my $seqname=$seq->display_name;
		print STDERR "Parsing $seqname\n";
		if ($seedid->{$seqname}) {print CO join("\t", $seedid->{$seqname}, $seqname)}
		foreach my $feature ($seq->top_SeqFeatures()) {
			#my $ftype;
			#eval {$ftype=$feature->primary()};

			#next unless ($ftype eq "CDS");
			$c++;
			my $id; # what we will call the sequence
			my ($trans, $gi, $geneid, $prod, $locus, $np);

			eval {$trans = join " ", $feature->each_tag_value("translation")};
			eval {$np = join " ", $feature->each_tag_value("protein_id")};
			if ($trans && !$np) {print STDERR "No NP for $trans. Skipped\n"; next}
			elsif (!$trans && $np) {print STDERR "No translation for $np. Skipped\n"; next}
			next unless ($trans && $np);

			eval {
				foreach my $xr ($feature->each_tag_value("db_xref")) 
				{
					($xr =~ /GI/) ? ($gi = $xr) : 1;
					($xr =~ /GeneID/) ? ($geneid = $xr) : 1;
				}
			};

			eval {$locus = join " ", $feature->each_tag_value("locus_tag")};
			eval {$prod  = join " ", $feature->each_tag_value("product")};

			my $oids;
			($locus)   && ($oids.="locus:$locus;");
			($geneid)  && ($oids.="$geneid;");
			($gi)      && ($oids.="$gi;");
			$oids =~ s/\;$//;

			if (0) 
			{
				unless ($locus) {print STDERR "No locus for $np\n"; $locus=""}
				unless ($geneid){print STDERR "No geneid for $np\n"; $geneid=""}
				unless ($gi)    {print STDERR "No gi for $np\n"; $gi=""} 
			}
			unless ($prod)  {print STDERR "No product for $np\n"; $prod="hypothetical protein"}
			print FA ">refseq|$np\n$trans\n";
			print PR "refseq|$np\t$prod\t$oids\n";
		}
	}
}


sub seedid {
	return unless (-e "../NCs.txt");
	open(IN, "../NCs.txt");
	my %seedid;
	while (<IN>) 
	{
		chomp;
		my ($id, $name, @acc)=split /\t/;
		map {$seedid{$_}=$id} @acc;
	}
	return \%seedid;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3