[Bio] / FortyEight / imp_salvage.pl Repository:
ViewVC logotype

Annotation of /FortyEight/imp_salvage.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : olson 1.1 #
2 :     # Salvage annotations for the organisms that are replacements of SEED genomes.
3 :     #
4 :     # This is true if the file REPLACES exists and containst the genome ID of an
5 :     # existing SEED genome.
6 :     #
7 :     # Note that for this to work the SEED that this script is invoked from has to
8 :     # have a current copy of the canonical SEED organism data.
9 :     #
10 :     # Salvage does the following:
11 :     #
12 :     # Determine the old->new mapping using make_peg_maps_from_fasta.
13 :     #
14 :     # Determine the set of pegs for which we will salvage assignments by finding
15 :     # the set of mapped pegs from the old organism that are in subsystems. We will
16 :     # carry across only those assignments; the others will be taken from the RAST
17 :     # pipeline. We will construct annotation file entries for the other mapped PEGs
18 :     # that have the old annotation data to keep the history, but they will be capped
19 :     # with the annotation for the RAST assignment.
20 :     #
21 :    
22 :    
23 :     use strict;
24 :     use FIG;
25 :     use Data::Dumper;
26 :     use FIG_Config;
27 :     use File::Copy;
28 :     use File::Basename;
29 :     use ImportJob;
30 :     use GenomeMeta;
31 :     use JobStage;
32 :    
33 :     @ARGV == 1 or die "Usage: $0 job-dir\n";
34 :    
35 :     my $fig = new FIG();
36 :    
37 :     my $max_hits = 300;
38 :    
39 :     my $jobdir = shift;
40 :    
41 :     -d $jobdir or die "$0: job dir $jobdir does not exist\n";
42 :    
43 :     my $stage = new JobStage('ImportJob', 'salvage', $jobdir);
44 :    
45 :     $stage or die "$0: Could not create job object";
46 :    
47 :     my $job = $stage->job;
48 :     my $job_id = $job->id;
49 :    
50 :     $stage->log("Running on " . $stage->hostname);
51 :    
52 :     $stage->set_status("running");
53 :     $stage->set_running("yes");
54 :    
55 :     $stage->set_qualified_metadata("host", $stage->hostname);
56 :    
57 :     #
58 :     #
59 :     # Begin
60 :     #
61 :     #
62 :    
63 :     open(JOBS, "<$jobdir/rast.jobs") or $stage->fatal("Cannot open $jobdir/rast.jobs: $!");
64 :    
65 :     while (my $rjdir = <JOBS>)
66 :     {
67 :     chomp $rjdir;
68 :    
69 :     my $rj = new Job48($rjdir);
70 :     my $repfile = $rj->orgdir() . "/REPLACES";
71 :    
72 :     my $repl = &FIG::file_head($repfile);
73 :     if ($repl)
74 :     {
75 :     chomp $repl;
76 :     do_salvage($rj, $repl);
77 :     }
78 :     }
79 :    
80 :     close(JOBS);
81 :    
82 :     sub do_salvage
83 :     {
84 :     my($job, $old_genome) = @_;
85 :    
86 :     print "Do replacement on " . $job->genome_name() . " from $old_genome\n";
87 :    
88 :     my $orgdir = $job->orgdir();
89 :    
90 :     #
91 :     # Figure out what the RAST assignments are. We do this in case a salvage
92 :     # is run multiple times.
93 :     # We stash a copy of the original RAST-generated data in rast.assigned_functions
94 :     # and rast.annotations if these files do not yet exist.
95 :     #
96 :    
97 :     if (! -f "$orgdir/rast.annotations" and -f "$orgdir/annotations")
98 :     {
99 :     copy("$orgdir/annotations", "$orgdir/rast.annotations") or
100 :     $stage->fatal("Cannot copy $orgdir/annotations to $orgdir/rast.annotations: $!");
101 :     }
102 :    
103 :     #
104 :     # We may not have yet built the assigned_functions from the proposed*functions.
105 :     #
106 :    
107 :     if (! -f "$orgdir/assigned_functions")
108 :     {
109 :     my $rc = system("cat $orgdir/proposed*functions > $orgdir/assigned_functions");
110 :     $rc == 0 or $stage->fatal("Cannot create $orgdir/assigned_functions from $orgdir/proposed*functions: rc=$rc");
111 :     }
112 :    
113 :     if (! -f "$orgdir/rast.assigned_functions" and -f "$orgdir/assigned_functions")
114 :     {
115 :     copy("$orgdir/assigned_functions", "$orgdir/rast.assigned_functions") or
116 :     $stage->fatal("Cannot copy $orgdir/assigned_functions to $orgdir/rast.assigned_functions: $!");
117 :     }
118 :    
119 :     #
120 :     # Assignments & annotations in place. Compute mappings.
121 :     #
122 :    
123 :     my $old_orgdir = "$FIG_Config::organisms/$old_genome";
124 :    
125 :     -d $old_orgdir or $stage->fatal("Old organism dir $old_orgdir does not exist");
126 :    
127 :     my $maps = "$orgdir/peg_maps";
128 :     #
129 :     # XXX - should rerun this every time, later.
130 :     if (! -f $maps)
131 :     {
132 :     $stage->run_process("make_peg_maps_from_fasta",
133 :     "$FIG_Config::bin/make_peg_maps_from_fasta",
134 :     $old_orgdir,
135 :     $orgdir,
136 :     $maps);
137 :     }
138 :    
139 :     #
140 :     # Ingest the map and mark each peg with its subsystem status.
141 :     #
142 :    
143 :     open(M, "<$maps") or $stage->fatal("Cannot open peg maps $maps: $!");
144 :     my @map;
145 :     my(%old_to_new, %new_to_old);
146 :     while (<M>)
147 :     {
148 :     chomp;
149 :     my($peg_old, $peg_new) = split(/\t/);
150 :     my $ss = [$fig->peg_to_subsystems($peg_old)];
151 :     my $ent = {
152 :     old => $peg_old,
153 :     new => $peg_new,
154 :     ss => $ss,
155 :     };
156 :    
157 :     push(@map, $ent);
158 :     $old_to_new{$peg_old} = $ent;
159 :     $new_to_old{$peg_new} = $ent;
160 :     }
161 :     close(M);
162 :    
163 :     #
164 :     # Given this map, we construct the new assigned_functions and annotations files.
165 :     #
166 :     # We start by copying rast.annotations to annotations, to get the initial history.
167 :     #
168 :     # Then we scan the old organism's annotations and assigned function files,
169 :     # remembering any of the pegs that show up in the map. Any that do not,
170 :     # we write to files unmapped.annotations and unmapped.assigned_functions, again
171 :     # to retain history.
172 :     #
173 :     # Once this scan is complete, we scan the new organism's assigned functions file.
174 :     # If a peg in there is mapped, we copy the set of annotations for the old
175 :     # peg across into the new annotations file, mapping pegs as we go.
176 :     #
177 :     # If the peg was in a subsystem, we then write
178 :     # an annotation declaring the set of subsystems the peg was in, and a final
179 :     # annotation with the function assignment from the old org. The old
180 :     # assignment is written to assigned_functions.
181 :     #
182 :     # If the peg is not in a subsystmem, we write a final annotation with the
183 :     # rast annotation, and write the rast assignemnt to assigned_functions.
184 :     #
185 :     # If the peg was not mapped, we just write the rast function to assigned_functions.
186 :     #
187 :     # Note that we don't actually have to scan anything - all we need to do is
188 :     # walk over the entries in the old/new map, and write out the appropriate data.
189 :     # Anything that isn't in there was already copied from the rast version of the data.
190 :     #
191 :    
192 :     my $new_af = $stage->open_file(">$orgdir/assigned_functions");
193 :     copy("$orgdir/rast.assigned_functions", $new_af) or
194 :     $stage->fatal("Cannot copy $orgdir/rast.assigned_functions to $orgdir/assigned_functions: $!");
195 :    
196 :     my $new_anno = $stage->open_file(">$orgdir/annotations");
197 :     copy("$orgdir/rast.annotations", $new_anno) or
198 :     $stage->fatal("Cannot copy $orgdir/rast.annotations to $orgdir/annotations: $!");
199 :    
200 :     my $unmapped_af = $stage->open_file(">$orgdir/unmapped.assigned_functions");
201 :     my $unmapped_anno = $stage->open_file(">$orgdir/unmapped.annotations");
202 :    
203 :     my $old_af = $stage->open_file("<$old_orgdir/assigned_functions");
204 :     my $old_anno = $stage->open_file("<$old_orgdir/annotations");
205 :    
206 :     my(%old_anno, %old_af);
207 :    
208 :     while (<$old_af>)
209 :     {
210 :     chomp;
211 :     my($peg, $fun) = split(/\t/);
212 :     if (my $ent = $old_to_new{$peg})
213 :     {
214 :     $ent->{old_func} = $fun;
215 :     }
216 :     else
217 :     {
218 :     print $unmapped_af "$_\n";
219 :     }
220 :     }
221 :     close($old_af);
222 :    
223 :     {
224 :     local $/;
225 :     $/ = "//\n";
226 :    
227 :     while (<$old_anno>)
228 :     {
229 :     chomp;
230 :     my($peg, $time, $who, $what, $val) = split(/\n/, $_, 5);
231 :    
232 :     if (my $ent = $old_to_new{$peg})
233 :     {
234 :     $val =~ s/\n*$//;
235 :     push @{$ent->{old_anno}}, [$peg, $time, $who, $what, $val];
236 :     }
237 :     else
238 :     {
239 :     print $unmapped_anno $ent . $/;
240 :     }
241 :     }
242 :     }
243 :     close($old_anno);
244 :    
245 :     #
246 :     # Pull in RAST assignments for use below
247 :     #
248 :     my %rast;
249 :     my $rast_af = $stage->open_file("$orgdir/rast.assigned_functions");
250 :     while (<$rast_af>)
251 :     {
252 :     chomp;
253 :     my($peg, $fn) = split(/\t/);
254 :     $rast{$peg} = $fn;
255 :     }
256 :     close($rast_af);
257 :    
258 :     for my $new_peg (sort { &FIG::by_fig_id($a, $b) } keys %new_to_old)
259 :     {
260 :     my $ent = $new_to_old{$new_peg};
261 :    
262 :     #
263 :     # Copy old annotations
264 :     #
265 :     for my $anno (@{$ent->{old_anno}})
266 :     {
267 :     $anno->[0] = $new_peg;
268 :     print $new_anno join("\n", @$anno), "\n//\n";
269 :     }
270 :    
271 :     my $old_func = $ent->{old_func};
272 :     my $new_func = $rast{$new_peg};
273 :    
274 :     #
275 :     # Determine if we need to update.
276 :     #
277 :     my @ss_list = @{$ent->{ss}};
278 :    
279 :     if ($old_func eq '')
280 :     {
281 :     print "$ent->{old} => $new_peg No existing function\n";
282 :     print $new_anno join("\n", $new_peg, time, "salvage",
283 :     "No function found in original organism $ent->{old}"), "\n//\n";
284 :    
285 :     }
286 :     elsif ($old_func eq $new_func)
287 :     {
288 :     print "$ent->{old} => $new_peg functions are the same\n";
289 :     print $new_anno join("\n", $new_peg, time, "salvage",
290 :     "Old and new assignments are the same", $old_func), "\n//\n";
291 :    
292 :     }
293 :     else
294 :     {
295 :     if (@ss_list > 0)
296 :     {
297 :     print "$ent->{old} => $new_peg is in a ss\n";
298 :    
299 :     print $new_anno join("\n", $new_peg, time, "salvage",
300 :     "Retaining old assignment due to membership in subsystems", "@ss_list", $old_func), "\n//\n";
301 :     print $new_anno join("\n", $new_peg, time, "Set master function to", $old_func), "\n//\n";
302 :     print $new_af "$new_peg\t$old_func\n";
303 :     }
304 :     else
305 :     {
306 :     print "$ent->{old} => $new_peg is not in a ss\n";
307 :    
308 :     print $new_anno join("\n", $new_peg, time, "salvage",
309 :     "Using RAST assignment due to no subsystem membership", $new_func), "\n//\n";
310 :     print $new_anno join("\n", $new_peg, time, "Set master function to", $new_func), "\n//\n";
311 :     }
312 :     }
313 :     }
314 :     close($new_af);
315 :     close($new_anno);
316 :     }
317 :    
318 :    
319 :    
320 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3