[Bio] / FigKernelScripts / export_all_viruses.pl Repository:
ViewVC logotype

View of /FigKernelScripts/export_all_viruses.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri May 3 17:29:19 2013 UTC (6 years, 6 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
 a set of scripts for adding phage genomes to the seed

#__perl__

use strict; 
use LWP::Simple qw/get getstore/;
use Data::Dumper;
use FIG;
my $fig=new FIG;
use Phage;
my $phantome=new Phage;


# Get the virus data from genbank and make a single file of it
# the virus data is here: http://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?taxid=10239&opt=Virus&sort=genome
# and this can be acquired as a tab separated text file.
#
# At the moment we download everything to a directory and then at the end we tar that up. We can also run it as a single file, I suppose

my @date=localtime(time);
$date[5]+=1900;
$date[4]++;

my $baseoutput = "/var/www/phantome/Downloads/Viruses/";
my $dateoutput = sprintf("%04d_%02d_%02d", $date[5], $date[4], $date[3]);
my $outputdir = $baseoutput.$dateoutput;
unless (-e $outputdir) {mkdir($outputdir, 0755) || die "can't make $outputdir"}

my $virus_url = 'http://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?opt=virus&taxid=10239&sort=Genome&cmd=download';
my $efetch_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&retmode=text&rettype=gb&tool=rob_virus&email=redwards@salmonella.org&id=';

my $lasttime=time;

open(IN, "<", \get($virus_url)) || die "can't open pipe";

my $current;
my $ids;
while (<IN>) {
	next if (/^\#/);
	if (/^\s+/) {
		while (s/(NC_\d+)//) {push @{$ids->{$current}}, $1}
		next;
	}
	my @a=split /\t/;
	if ($a[0]) {$current=$a[0]}
	if ($a[1] =~ /NC/) {push @{$ids->{$current}}, $a[1]}
}
close IN;

# if it is a phage genome, we should have a better annotation, so use that
# get the current refseq ids
my %phageID;
foreach my $phage ($phantome->phages()) {
	if (-e $FIG_Config::organisms."/$phage/REFSEQ_ID") {
		open(IN, $FIG_Config::organisms."/$phage/REFSEQ_ID") || die "can't open ".$FIG_Config::organisms."/$phage/REFSEQ_ID";
		my $nc = <IN>;
		chomp($nc);
		close IN;
		$phageID{$nc}=$phage;
	}
	else {
		#print STDERR "No refseq ID was found for $phage so we use the phantome version\n";
		&get_phantome_phage($phage);
	}
}

foreach my $genome (keys %$ids) {
	#print STDERR $genome, "\t", join(", ", @{$ids->{$genome}}), "\n";
	my @ids = @{$ids->{$genome}};
	#if (scalar(@ids) == 1) {
		#my $url = $efetch_url. $ids[0];
		#getstore($url, "genbank_output/$ids.gbk");
	#}
	#else { }
	my $url = $efetch_url.join(",", @ids);
	$lasttime=&ncbisleep($lasttime);
	getstore($url, "$outputdir/$ids[0].gbk");
}

chdir($baseoutput);
system("tar zcf $dateoutput.tgz $dateoutput");
exit(0);

sub ncbisleep {
	my $lasttime=shift;
	while (time < $lasttime+2) {sleep 1}
	return time;
}

	


sub get_phantome_phage {
	my $phage = shift;
	next if (-e "$outputdir/$phage.gbk");
	my $cmd = "/home/fig/FIGdisk/FIG/bin/seed2genbank -g $phage -o $outputdir/$phage.gbk -t all -color phage";
	system($cmd);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3