[Bio] / FortyEight / imp_salvage.pl Repository:
ViewVC logotype

Diff of /FortyEight/imp_salvage.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Thu Sep 13 19:59:18 2007 UTC revision 1.2, Tue Sep 25 01:14:57 2007 UTC
# Line 29  Line 29 
29  use ImportJob;  use ImportJob;
30  use GenomeMeta;  use GenomeMeta;
31  use JobStage;  use JobStage;
32    use POSIX;
33    
34  @ARGV == 1 or die "Usage: $0 job-dir\n";  @ARGV == 1 or die "Usage: $0 job-dir\n";
35    
# Line 67  Line 68 
68      chomp $rjdir;      chomp $rjdir;
69    
70      my $rj = new Job48($rjdir);      my $rj = new Job48($rjdir);
71      my $repfile = $rj->orgdir() . "/REPLACES";      my $rj_id = $rj->id;
72        my $orgdir = $rj->orgdir();
73        my $repfile = "$orgdir/REPLACES";
74    
75      my $repl = &FIG::file_head($repfile);      my $repl = &FIG::file_head($repfile);
76    
77        my $salvage_msg;
78    
79      if ($repl)      if ($repl)
80      {      {
81          chomp $repl;          chomp $repl;
82          do_salvage($rj, $repl);          my $n = do_salvage($rj, $repl);
     }  
 }  
83    
84  close(JOBS);          $salvage_msg = "$n function assignments salvaged from $repl " . $fig->genus_species($repl);
85        }
86  sub do_salvage      else
87  {  {
     my($job, $old_genome) = @_;  
   
     print "Do replacement on " . $job->genome_name()  . " from $old_genome\n";  
   
     my $orgdir = $job->orgdir();  
   
88      #      #
89      # Figure out what the RAST assignments are. We do this in case a salvage          # We are not salvaging, but we need to do a little cleanup to make the two cases the same.
90      # is run multiple times.          #
91      # We stash a copy of the original RAST-generated data in rast.assigned_functions          # Create imp_assigned_functions from the set of *_functions files we have, and copy
92      # and rast.annotations if these files do not yet exist.          # the annotations over to imp_annotations.
93      #      #
94    
95      if (! -f "$orgdir/rast.annotations" and -f "$orgdir/annotations")          my $imp_af = $stage->open_file(">$orgdir/imp_assigned_functions");
96            for my $f (qw(assigned_functions proposed_non_ff_functions proposed_functions))
97            {
98                my $path = "$orgdir/$f";
99    
100                if (open(AF, "<$path"))
101      {      {
102          copy("$orgdir/annotations", "$orgdir/rast.annotations") or                  while (<AF>)
103              $stage->fatal("Cannot copy $orgdir/annotations to $orgdir/rast.annotations: $!");                  {
104                        print $imp_af $_;
105                    }
106                    close(AF);
107                }
108            }
109            close($imp_af);
110    
111            if (-f "$orgdir/annotations")
112            {
113                copy("$orgdir/annotations", "$orgdir/imp_annotations") or
114                    $stage->fatal("Cannot copy $orgdir/annotations to $orgdir/imp_annotations: $!");
115            }
116      }      }
117    
118      #      #
119      # We may not have yet built the assigned_functions from the proposed*functions.      # While we're here, we're going to also mark this genome directory as a RAST job.
120      #      #
121    
122      if (! -f "$orgdir/assigned_functions")      my $fh = $stage->open_file(">$orgdir/RAST");
123      {      my $submit_time = ctime($rj->meta->get_metadata("upload.timestamp"));
124          my $rc = system("cat $orgdir/proposed*functions > $orgdir/assigned_functions");      my $dtime = (stat("$rjdir/DONE"))[9];
125          $rc == 0 or $stage->fatal("Cannot create $orgdir/assigned_functions from $orgdir/proposed*functions: rc=$rc");      my $finish_time = ctime($dtime);
126        my $import_time = ctime(time);
127        print $fh "Genome processed by RAST at $FIG_Config::fig\n";
128        print $fh "$salvage_msg\n" if $salvage_msg;
129        print $fh "RAST job number $rj_id from $rjdir\n";
130        print $fh "Upload at: $submit_time";
131        print $fh "Completion at: $finish_time";
132        print $fh "Import processing at: $import_time";
133        close($fh);
134    
135      }      }
136    
137      if (! -f "$orgdir/rast.assigned_functions" and -f "$orgdir/assigned_functions")  close(JOBS);
138    
139    sub do_salvage
140      {      {
141          copy("$orgdir/assigned_functions", "$orgdir/rast.assigned_functions") or      my($job, $old_genome) = @_;
142              $stage->fatal("Cannot copy $orgdir/assigned_functions to $orgdir/rast.assigned_functions: $!");  
143      }      print "Do replacement on " . $job->genome_name()  . " from $old_genome\n";
144    
145        my $orgdir = $job->orgdir();
146    
147      #      #
148      # Assignments & annotations in place. Compute mappings.      # Compute mappings.
149      #      #
150    
151        my $n_salvaged = 0;
152    
153      my $old_orgdir = "$FIG_Config::organisms/$old_genome";      my $old_orgdir = "$FIG_Config::organisms/$old_genome";
154    
155      -d $old_orgdir or $stage->fatal("Old organism dir $old_orgdir does not exist");      -d $old_orgdir or $stage->fatal("Old organism dir $old_orgdir does not exist");
# Line 163  Line 193 
193      #      #
194      # Given this map, we construct the new assigned_functions and annotations files.      # Given this map, we construct the new assigned_functions and annotations files.
195      #      #
196      # We start by copying rast.annotations to annotations, to get the initial history.      # We start by copying annotations to imp_annotations, to get the initial history.
197      #      #
198      # Then we scan the old organism's annotations and assigned function files,      # Then we scan the old organism's annotations and assigned function files,
199      # remembering any of the pegs that show up in the map. Any that do not,      # remembering any of the pegs that show up in the map. Any that do not,
# Line 177  Line 207 
207      # If the peg was in a subsystem, we then write      # If the peg was in a subsystem, we then write
208      # an annotation declaring the set of subsystems the peg was in, and a final      # an annotation declaring the set of subsystems the peg was in, and a final
209      # annotation with the function assignment from the old org. The old      # annotation with the function assignment from the old org. The old
210      # assignment is written to assigned_functions.      # assignment is written to imp_assigned_functions.
211      #      #
212      # If the peg is not in a subsystmem, we write a final annotation with the      # If the peg is not in a subsystmem, we write a final annotation with the
213      # rast annotation, and write the rast assignemnt to assigned_functions.      # rast annotation, and write the rast assignment to imp_assigned_functions.
214      #      #
215      # If the peg was not mapped, we just write the rast function to assigned_functions.      # If the peg was not mapped, we just write the rast function to imp_assigned_functions.
216      #      #
217      # Note that we don't actually have to scan anything - all we need to do is      # Note that we don't actually have to scan anything - all we need to do is
218      # walk over the entries in the old/new map, and write out the appropriate data.      # walk over the entries in the old/new map, and write out the appropriate data.
219      # Anything that isn't in there was already copied from the rast version of the data.      # Anything that isn't in there was already copied from the rast version of the data.
220      #      #
221    
222      my $new_af = $stage->open_file(">$orgdir/assigned_functions");      my $new_af = $stage->open_file(">$orgdir/imp_assigned_functions");
223      copy("$orgdir/rast.assigned_functions", $new_af) or  
224          $stage->fatal("Cannot copy $orgdir/rast.assigned_functions to $orgdir/assigned_functions: $!");      #
225        # Read the RAST assigned functions files, write a single large file with
226      my $new_anno = $stage->open_file(">$orgdir/annotations");      # all the data in it, and pull the assignments into the %rast hash as well.
227      copy("$orgdir/rast.annotations", $new_anno) or      #
228          $stage->fatal("Cannot copy $orgdir/rast.annotations to $orgdir/annotations: $!");  
229        my %rast;
230        my $imp_af = $stage->open_file(">$orgdir/imp_assigned_functions");
231        for my $f (qw(assigned_functions proposed_non_ff_functions proposed_functions))
232        {
233            my $path = "$orgdir/$f";
234    
235            if (open(AF, "<$path"))
236            {
237                while (<AF>)
238                {
239                    print $imp_af $_;
240                    chomp;
241                    my($peg, $fn) = split(/\t/);
242                    $rast{$peg} = $fn;
243                }
244                close(AF);
245            }
246        }
247        close($imp_af);
248    
249        #
250        # Copy annotations to imp_annotations to initialize it; leave the
251        # filehandle open to add more later on.
252        #
253        my $new_anno = $stage->open_file(">$orgdir/imp_annotations");
254        my $orig_anno = $stage->open_file("<$orgdir/annotations");
255        my $buf;
256        while (read($orig_anno, $buf, 4096))
257        {
258            print $new_anno $buf;
259        }
260        close($orig_anno);
261    
262      my $unmapped_af = $stage->open_file(">$orgdir/unmapped.assigned_functions");      my $unmapped_af = $stage->open_file(">$orgdir/unmapped.assigned_functions");
263      my $unmapped_anno = $stage->open_file(">$orgdir/unmapped.annotations");      my $unmapped_anno = $stage->open_file(">$orgdir/unmapped.annotations");
# Line 205  Line 267 
267    
268      my(%old_anno, %old_af);      my(%old_anno, %old_af);
269    
270        #
271        # Scan the SEED annotations and assigned functions files and ingest.
272        #
273    
274      while (<$old_af>)      while (<$old_af>)
275      {      {
276          chomp;          chomp;
# Line 242  Line 308 
308      }      }
309      close($old_anno);      close($old_anno);
310    
     #  
     # Pull in RAST assignments for use below  
     #  
     my %rast;  
     my $rast_af = $stage->open_file("$orgdir/rast.assigned_functions");  
     while (<$rast_af>)  
     {  
         chomp;  
         my($peg, $fn) = split(/\t/);  
         $rast{$peg} = $fn;  
     }  
     close($rast_af);  
   
311      for my $new_peg (sort { &FIG::by_fig_id($a, $b) }  keys %new_to_old)      for my $new_peg (sort { &FIG::by_fig_id($a, $b) }  keys %new_to_old)
312      {      {
313          my $ent = $new_to_old{$new_peg};          my $ent = $new_to_old{$new_peg};
# Line 298  Line 351 
351    
352                  print $new_anno join("\n", $new_peg, time, "salvage",                  print $new_anno join("\n", $new_peg, time, "salvage",
353                                       "Retaining old assignment due to membership in subsystems", "@ss_list", $old_func), "\n//\n";                                       "Retaining old assignment due to membership in subsystems", "@ss_list", $old_func), "\n//\n";
354                  print $new_anno join("\n", $new_peg, time, "Set master function to", $old_func), "\n//\n";                  print $new_anno join("\n", $new_peg, time, "salvage", "Set master function to", $old_func), "\n//\n";
355                  print $new_af "$new_peg\t$old_func\n";                  print $new_af "$new_peg\t$old_func\n";
356    
357                    $n_salvaged++;
358              }              }
359              else              else
360              {              {
# Line 307  Line 362 
362    
363                  print $new_anno join("\n", $new_peg, time, "salvage",                  print $new_anno join("\n", $new_peg, time, "salvage",
364                                       "Using RAST assignment due to no subsystem membership", $new_func), "\n//\n";                                       "Using RAST assignment due to no subsystem membership", $new_func), "\n//\n";
365                  print $new_anno join("\n", $new_peg, time, "Set master function to", $new_func), "\n//\n";                  print $new_anno join("\n", $new_peg, time, "salvage", "Set master function to", $new_func), "\n//\n";
366              }              }
367          }          }
368      }      }
369      close($new_af);      close($new_af);
370      close($new_anno);      close($new_anno);
371    
372        return $n_salvaged;
373  }  }
374    
375    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.2

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3