[Bio] / FigKernelScripts / export_phage_data.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/export_phage_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : redwards 1.1 #__perl__
2 :    
3 :     =pod
4 :    
5 :     This script exports the data from the phage genomes (only), into a directory that we specify. Here it is hard coded to one that Rob uses. Basically creates subdirectories with the current date, and exports all data in both genbank and fasta format.
6 :    
7 :     This may well be run as a cron job!
8 :    
9 :     =cut
10 :    
11 :     use strict;
12 :     use FIG;
13 :     my $fig=new FIG;
14 :     use Phage;
15 :     my $phage=new Phage;
16 :    
17 :     my $destdir = "/var/www/phantome/Downloads";
18 :     unless (-e $destdir) {
19 :     die "FATAL: $destdir does not exist. Are you running this on a machine that is not phantome?";
20 :     }
21 :    
22 :     # figure out the date
23 :     my $timestamp = time;
24 :     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($timestamp);
25 :     $mon++;
26 :     $year += 1900;
27 :    
28 :     my $date = $year."-".$mon."-".$mday;
29 :    
30 :     # make the directory structure for the outputs
31 :     #
32 :     # Something like:
33 :     #
34 :     # /var/www/phantome/Downloads/
35 :     # DNA/all_sequences
36 :     # DNA/by_genome/DATE
37 :     #
38 :     # proteins/all_sequences
39 :     # proteins/by_genome/DATE
40 :     #
41 :     # genomes/genbank/DATE
42 :     # genomes/gff3/DATA
43 :    
44 :     mkdir "$destdir/DNA/by_genome/$date", 0755;
45 :     mkdir "$destdir/proteins/by_genome/$date", 0755;
46 :     mkdir "$destdir/genomes/genbank/$date", 0755;
47 :     mkdir "$destdir/genomes/gff3/$date", 0755;
48 :    
49 :     # open the generic files that we'll need
50 :     open(CONTIGS, ">$destdir/DNA/all_sequences/phage_contigs_$timestamp.fasta") || die "Can't open contigs $destdir/DNA/all_sequences/phage_contigs_$timestamp.fasta";
51 :     open(PROTEINS, ">$destdir/proteins/all_sequences/phage_proteins_$timestamp.fasta") || die "can't open all proteins $destdir/proteins/all_sequences/phage_proteins_$timestamp.fasta";
52 :    
53 :    
54 :     # iterate through the phages and export all the data:
55 :     my @children;
56 :     foreach my $phage ($phage->phages()) {
57 :     my $n;
58 :     my $gs = $fig->genus_species($phage);
59 :     print STDERR "$gs ($phage)\n";
60 :    
61 :     # open the genome file
62 :     open(DNA, ">$destdir/DNA/by_genome/$date/$phage.fasta") || die "can't open $destdir/DNA/by_genome/$date/$phage.fasta";
63 :     # process the contigs
64 :     foreach my $contig ($fig->contigs_of($phage)) {
65 :     my $end = $fig->contig_ln($phage, $contig);
66 :     my $dnaseq = $fig->dna_seq($phage, ($contig."_1_".$end));
67 :     print CONTIGS ">$contig [$phage] [$gs]\n$dnaseq\n";
68 :     print DNA ">$contig\n$dnaseq\n";
69 :     }
70 :     close DNA;
71 :    
72 :     open(PROT, ">$destdir/proteins/by_genome/$date/$phage.fasta") || die "can't open $destdir/proteins/by_genome/$date/$phage.fasta";
73 :     # process the proteins
74 :     foreach my $peg ($fig->pegs_of($phage)) {
75 :     my $trans= $fig->get_translation($peg);
76 :     my $fn = scalar($fig->function_of($peg));
77 :     print PROT ">$peg [$fn]\n$trans\n";
78 :     print PROTEINS ">$peg [$fn] [$phage] [$gs]\n$trans\n";
79 :     }
80 :     close PROT;
81 :    
82 :     # finally, make the gff and genbank files
83 :     # for the gbk files: seed2genbank -g $i -o $i.gbk -t all -color phage
84 :     my $output = "-o $destdir/genomes/genbank/$date/$phage.gbk";
85 :     my $opts = " -g $phage -t all -color phage ";
86 :     my $s2g = '/home/fig/FIGdisk/FIG/bin/seed2genbank';
87 :     my $pid = $fig->run_in_background(sub {system("$s2g $opts $output")});
88 :     push @children, $pid;
89 :     print STDERR "Started $s2g $opts $output with PID: $pid\n";
90 :    
91 :     # for the gff3 files
92 :     $output = "-o $destdir/genomes/gff3/$date/$phage.gff3";
93 :     $opts = " -g $phage -t all ";
94 :     $s2g = '/home/fig/FIGdisk/FIG/bin/seed2gff';
95 :     $pid = $fig->run_in_background(sub {system("$s2g $opts $output")});
96 :     push @children, $pid;
97 :     print STDERR "Started $s2g $opts $output with PID: $pid\n";
98 :    
99 :     # this is just to sleep and let the children catch up!
100 :     $n++;
101 :     unless ($n % 5) {sleep 5}
102 :     }
103 :    
104 :     # now we have to wait for the children to die before we can make tar archives
105 :     print STDERR "waiting for children to finish\n"; my $t=time;
106 :     foreach my $child (@children) {
107 :     waitpid($child, 0);
108 :     }
109 :     print STDERR "Waiting took ", (time - $t), " seconds \n";
110 :    
111 :     my $tar = "tar zcf $date.tgz $date";
112 :    
113 :     my @tarChildren;
114 :     my @directories = ("$destdir/DNA/by_genome/", "$destdir/proteins/by_genome/", "$destdir/genomes/gff3/", "$destdir/genomes/genbank/");
115 :    
116 :     foreach my $dir (@directories) {
117 :     my $pid = $fig->run_in_background(sub {chdir($dir); system($tar)});
118 :     push @tarChildren, $pid;
119 :     }
120 :    
121 :     print STDERR "waiting for tars to finish \n"; $t=time;
122 :     foreach my $child (@tarChildren) {
123 :     waitpid($child, 0);
124 :     }
125 :     print STDERR "Waiting took ", (time - $t), " seconds \n";
126 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3