[Bio] / FigKernelScripts / export_all_viruses.pl Repository:
ViewVC logotype

View of /FigKernelScripts/export_all_viruses.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Fri May 3 17:29:19 2013 UTC (6 years, 6 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
 a set of scripts for adding phage genomes to the seed


use strict; 
use LWP::Simple qw/get getstore/;
use Data::Dumper;
use FIG;
my $fig=new FIG;
use Phage;
my $phantome=new Phage;

# Get the virus data from genbank and make a single file of it
# the virus data is here: http://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?taxid=10239&opt=Virus&sort=genome
# and this can be acquired as a tab separated text file.
# At the moment we download everything to a directory and then at the end we tar that up. We can also run it as a single file, I suppose

my @date=localtime(time);

my $baseoutput = "/var/www/phantome/Downloads/Viruses/";
my $dateoutput = sprintf("%04d_%02d_%02d", $date[5], $date[4], $date[3]);
my $outputdir = $baseoutput.$dateoutput;
unless (-e $outputdir) {mkdir($outputdir, 0755) || die "can't make $outputdir"}

my $virus_url = 'http://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?opt=virus&taxid=10239&sort=Genome&cmd=download';
my $efetch_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&retmode=text&rettype=gb&tool=rob_virus&email=redwards@salmonella.org&id=';

my $lasttime=time;

open(IN, "<", \get($virus_url)) || die "can't open pipe";

my $current;
my $ids;
while (<IN>) {
	next if (/^\#/);
	if (/^\s+/) {
		while (s/(NC_\d+)//) {push @{$ids->{$current}}, $1}
	my @a=split /\t/;
	if ($a[0]) {$current=$a[0]}
	if ($a[1] =~ /NC/) {push @{$ids->{$current}}, $a[1]}
close IN;

# if it is a phage genome, we should have a better annotation, so use that
# get the current refseq ids
my %phageID;
foreach my $phage ($phantome->phages()) {
	if (-e $FIG_Config::organisms."/$phage/REFSEQ_ID") {
		open(IN, $FIG_Config::organisms."/$phage/REFSEQ_ID") || die "can't open ".$FIG_Config::organisms."/$phage/REFSEQ_ID";
		my $nc = <IN>;
		close IN;
	else {
		#print STDERR "No refseq ID was found for $phage so we use the phantome version\n";

foreach my $genome (keys %$ids) {
	#print STDERR $genome, "\t", join(", ", @{$ids->{$genome}}), "\n";
	my @ids = @{$ids->{$genome}};
	#if (scalar(@ids) == 1) {
		#my $url = $efetch_url. $ids[0];
		#getstore($url, "genbank_output/$ids.gbk");
	#else { }
	my $url = $efetch_url.join(",", @ids);
	getstore($url, "$outputdir/$ids[0].gbk");

system("tar zcf $dateoutput.tgz $dateoutput");

sub ncbisleep {
	my $lasttime=shift;
	while (time < $lasttime+2) {sleep 1}
	return time;


sub get_phantome_phage {
	my $phage = shift;
	next if (-e "$outputdir/$phage.gbk");
	my $cmd = "/home/fig/FIGdisk/FIG/bin/seed2genbank -g $phage -o $outputdir/$phage.gbk -t all -color phage";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3