[Bio] / FigKernelScripts / make_release.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/make_release.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : disz 1.1 use Data::Dumper;
2 :     use strict;
3 :     use SeedEnv;
4 :     use gjoseqlib;
5 :     use ReleaseConfig;
6 :    
7 :     my @time = localtime();
8 :     my $yyyy = $time[5] + 1900;
9 :     my $mm = $time[4] + 1;
10 : disz 1.2 if ($mm < 10) { $mm = '0' . $mm }
11 : disz 1.1
12 :     my $pseed_org = $ReleaseConfig::pseed_org;
13 :     my $coreseed_org = $ReleaseConfig::coreseed_org;
14 :     my $coreseed_sub = $ReleaseConfig::coreseed_sub;
15 :     my $base = $ReleaseConfig::base;
16 :     my $releases = $ReleaseConfig::releases;
17 :     my $subsystems = $ReleaseConfig::subsystems;
18 :    
19 :     (-s $subsystems) || die "you are missing $subsystems";
20 : disz 1.2
21 : olson 1.5 my $oldD = "$releases/.old";
22 :     my $newD = "$releases/.new";
23 :     my $problemSetD = "ProblemSets.$yyyy.$mm";
24 :     my $destD = "$releases/$problemSetD";
25 : disz 1.1 if (-d $newD)
26 :     {
27 : disz 1.2 &SeedUtils::run("rm -rf $newD");
28 : disz 1.1 }
29 :     mkdir($newD,0777);
30 :     mkdir("$newD/GenomeData",0777) || die "could not make $newD/GenomeData";
31 :     mkdir("$newD/SubsystemsData",0777) || die "could not make $newD/SubsystemsData";
32 :    
33 :     my $cs_funcs = {};
34 : olson 1.4 open(ALL_PEGS, "| gzip > $newD/all.faa.gz");
35 :     my($genome_names,$all_pegs) = &load_genomes("$newD/GenomeData",$coreseed_org,$pseed_org,$cs_funcs, \*ALL_PEGS);
36 :     close(ALL_PEGS);
37 : disz 1.2 &load_subsystems("$newD/SubsystemsData",$subsystems,$coreseed_sub,$cs_funcs,$genome_names,$coreseed_org,$all_pegs);
38 :    
39 :     if (-d $destD)
40 :     {
41 :     &SeedUtils::run("mv $destD $oldD");
42 :     }
43 :     &SeedUtils::run("mv $newD $destD");
44 : disz 1.1 &SeedUtils::run("pushd $releases; tar czf current_release.tgz notes* ProblemSets.$yyyy.$mm");
45 : olson 1.5 unlink("$releases/ProblemSets.current");
46 :     symlink($problemSetD, "$releases/ProblemSets.current");
47 : disz 1.2 if (-d $oldD)
48 :     {
49 :     &SeedUtils::run("rm -rf $oldD");
50 :     }
51 : disz 1.1 #&SeedUtils::run("pushd /homes/overbeek/Ross/AnnotationDataSite/Releases; tar czf current_release.tgz notes* ProblemSets.$yyyy.$mm");
52 :    
53 :    
54 :     sub load_genomes {
55 : olson 1.4 my($genomeD,$corseseed_org,$pseed_org,$cs_funcs, $unified_output_fh) = @_;
56 : disz 1.1
57 :     $ENV{'SAS_SERVER'} = 'PSEED';
58 :     my @tmp = grep { $_->[0] !~ /phage|plasmid|virus/i } map { chomp; [split(/\t/,$_)] } `svr_all_genomes -complete -prokaryotic`;
59 :     # $#tmp = 100;
60 :     my @genomes = map { $_->[1] } sort { $a <=> $b } @tmp;
61 :     my %genome_names = map { ($_->[1] => $_->[0]) } @tmp;
62 :    
63 : disz 1.2 my $all_pegs = {};
64 : disz 1.1 foreach my $g (@genomes)
65 :     {
66 :     my $genD = "$genomeD/$g";
67 :     mkdir($genD,0777) || die "could not make $genomeD/$g";
68 :     my $type = (-d "$coreseed_org/$g") ? 'c' : 'p';
69 :     open(TYPE,">$genD/type") || die "could not open $genD/type";
70 :     print TYPE "$type\n";
71 :     close(TYPE);
72 :     &SeedUtils::run("cp $pseed_org/$g/contigs $genD/contigs");
73 :     if ($type eq 'c')
74 :     {
75 :     # print STDERR "$g is a coreSEED genome\n";
76 : olson 1.4 &copy_peg_data($genD,"$coreseed_org/$g",$cs_funcs,$all_pegs, $unified_output_fh);
77 : disz 1.1 }
78 :     else
79 :     {
80 : olson 1.4 &copy_peg_data($genD,"$pseed_org/$g",undef,$all_pegs, $unified_output_fh);
81 : disz 1.1 }
82 :     }
83 : disz 1.2 return (\%genome_names,$all_pegs);
84 : disz 1.1 }
85 :    
86 :     sub copy_peg_data {
87 : olson 1.4 my($toD,$fromD,$cs_funcs,$all_pegs, $unified_output_fh) = @_;
88 :    
89 :     my $gnf;
90 :     my $gname;
91 :     if (open($gnf, "<", "$fromD/GENOME"))
92 :     {
93 :     $gname = <$gnf>;
94 :     chomp $gname;
95 :     close($gnf);
96 :     }
97 : disz 1.1
98 :     my %function_of;
99 :     foreach $_ (`cat $fromD/assigned_functions`)
100 :     {
101 :     if ($_ =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S[^\t]*\S)/)
102 :     {
103 :     $function_of{$1} = $2;
104 :     }
105 :     }
106 :    
107 :     my %loc_of;
108 :     foreach $_ (`cat $fromD/Features/peg/tbl`)
109 :     {
110 :     if ($_ =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S[^\t]*\S)/)
111 :     {
112 :     my $peg = $1;
113 :     my $locs = $2;
114 :     my @locs = split(/,/,$locs);
115 :     my @new_locs = map { &to_new($_) } @locs;
116 :     $loc_of{$peg} = join(",",@new_locs);
117 :     }
118 :     }
119 :    
120 :     my %tran_of;
121 :     my @tuples = &gjoseqlib::read_fasta("$fromD/Features/peg/fasta");
122 :     foreach $_ (@tuples)
123 :     {
124 :     my($peg,undef,$seq) = @$_;
125 :     $tran_of{$peg} = $seq;
126 :     }
127 :     open(INFO,">$toD/peg-info") || die "could not open $toD/peg-info";
128 :     open(TRANS,">$toD/peg-trans") || die "could not open $toD/peg-trans";
129 : olson 1.4 foreach my $peg (sort { &SeedUtils::by_fig_id($a, $b) } keys(%loc_of))
130 : disz 1.1 {
131 :     my $f = $function_of{$peg};
132 :     if (! $f) { $f = "hypothetical protein" }
133 :     if ($cs_funcs) { $cs_funcs->{$peg} = $f }
134 :     my $loc = $loc_of{$peg};
135 :     my $tran = $tran_of{$peg};
136 : disz 1.2 if ($loc && $tran)
137 :     {
138 :     $all_pegs->{$peg} = 1;
139 :     print INFO join("\t",($peg,$loc,$f)),"\n";
140 : olson 1.4 my $info = "$f [$gname]";
141 :     gjoseqlib::print_alignment_as_fasta(\*TRANS, [$peg, $info, $tran]);
142 :     gjoseqlib::print_alignment_as_fasta($unified_output_fh, [$peg, $info, $tran]);
143 :     # print TRANS ">$peg\n$tran\n";
144 : disz 1.2 }
145 : disz 1.1 }
146 :     close(INFO);
147 :     close(TRANS);
148 :     }
149 :    
150 :     sub to_new {
151 :     my($old) = @_;
152 :    
153 :     if ($old =~ /^(\S+)_(\d+)_(\d+)$/)
154 :     {
155 :     my $contig = $1;
156 :     my $from = $2;
157 :     my $to = $3;
158 :     if ($from <= $to)
159 :     {
160 : overbeek 1.3 my $len = ($to - $from) + 1;
161 : disz 1.1 return $contig . "_" . $from . "+" . $len;
162 :     }
163 :     else
164 :     {
165 :     my $len = ($from - $to) + 1;
166 :     return $contig . "_" . $from . "-" . $len;
167 :     }
168 :     }
169 :     return undef;
170 :     }
171 :    
172 :     sub load_subsystems {
173 : disz 1.2 my($subsysD,$subsystems,$seed_subsys,$cs_funcs,$genome_names,$coreseed_org,$all_pegs) = @_;
174 : disz 1.1
175 :     my @subsystems = map { chomp; $_ =~ s/ /_/g; $_ } `cat $subsystems`;
176 :     foreach my $ss (map { chomp; $_ } @subsystems)
177 :     {
178 :     $ss =~ s/ /_/g;
179 :     if (-d "$seed_subsys/$ss")
180 :     {
181 :     # print STDERR "processing $ss\n";
182 :     my $new_ssD = "$subsysD/$ss";
183 :     mkdir($new_ssD,0777) || die "could not make $new_ssD";
184 :     my @subsys = `cat \'$seed_subsys/$ss/spreadsheet\'`;
185 :     # print STDERR "got spreadsheet\n";
186 :     open(ROLES,">$new_ssD/Roles") || die "could not open $new_ssD/Roles";
187 :     open(GENOMES,">$new_ssD/GenomesInSubsys") || die "could not open $new_ssD/GenomesInSubsys";
188 :     open(PEGS,">$new_ssD/PegsInSubsys") || die "could not open $new_ssD/PegsInSubsys";
189 :    
190 :     my $x;
191 :     while (($x = shift @subsys) && ($x !~ /^\/\//))
192 :     {
193 :     if ($x =~ /^\S+\t(\S.*\S)/)
194 :     {
195 :     print ROLES $1 . "\n";
196 :     }
197 :     }
198 :     close(ROLES);
199 :     # print STDERR "got roles\n";
200 :     while (($x = shift @subsys) && ($x !~ /^\/\//)) {}
201 :     my %pegH;
202 :     while (($x = shift @subsys) && ($x !~ /^\/\//))
203 :     {
204 :     chomp $x;
205 :     my($g,$v,@pegNs) = split(/\t/,$x);
206 :     next if (($v =~ /^\*/) || (! $genome_names->{$g}));
207 : disz 1.2 next if (! -d "$coreseed_org/$g"); ### take subsystems only from coreSEED
208 : disz 1.1 print GENOMES join("\t",($g,$genome_names->{$g},$v)),"\n";
209 :     my @tmp = map { ($_ =~ /^(\S+)/) ? split(/,/,$_) : () } @pegNs;
210 :     foreach $_ (@tmp)
211 :     {
212 :     my $peg = "fig|$g.peg.$_";
213 : disz 1.2 if ($all_pegs->{$peg})
214 :     {
215 :     my $f = $cs_funcs->{$peg};
216 :     $pegH{$peg} = $f;
217 :     }
218 :     else
219 :     {
220 :     print STDERR "Subsystem $ss contains invalid peg $peg\n";
221 :     }
222 : disz 1.1 }
223 :     }
224 :     close(GENOMES);
225 :     foreach my $peg (sort { $pegH{$a} cmp $pegH{$b} } keys(%pegH))
226 :     {
227 :     print PEGS join("\t",($peg,$pegH{$peg})),"\n";
228 :     }
229 :     close(PEGS);
230 :     }
231 :     else
232 :     {
233 :     print STDERR "could not find subsystem $seed_subsys/$ss\n";
234 :     }
235 :     }
236 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3