[Bio] / FigKernelPackages / P2Pupdate.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/P2Pupdate.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.39 - (view) (download) (as text)

1 : efrank 1.1 package P2Pupdate;
2 :    
3 : olson 1.33 use strict;
4 :    
5 : efrank 1.1 use FIG_Config;
6 :     use FIG;
7 :     use Carp;
8 :     use Data::Dumper;
9 :     use Cwd;
10 :    
11 :     =pod
12 :    
13 :     =head1 updating code
14 :    
15 :     This routine packages what is needed to upgrade an older system to the
16 :     current code. Code releases are numered
17 :    
18 :     p1n1.p2n2.p3n3...
19 :    
20 :     where "." is added at the point the code moved to another branch of
21 :     the tree. FIG, who provided the initial release of the SEED, will
22 :     number all of their code releases as
23 :    
24 :     FIGn
25 :    
26 :     where n is an integer. Suppose that between releases 13 and 14 a
27 :     second group (which we will term "Idiots" for convenience) took
28 :     release 13 and wished to branch the code tree. At that point, they
29 :     would name their first release as
30 :    
31 :     FIG13.Idiots1
32 :    
33 :     We are, of course, being both cavalier and nasty when we make such a
34 :     reference. We do, however, wish to express the view that it will
35 :     benefit everyone to attempt to reconcile differences and maintain a
36 :     single code progression as long as possible. There are often good
37 :     reasons to part ways, but we urge people to think carefully before
38 :     taking such a step.
39 :    
40 :     Two code releases
41 :    
42 :     i1.i2.i3...in
43 :     and j1.j2.j3...jm with m <= n
44 :    
45 :     are compatible iff
46 :    
47 :     ip == jp for p < m, and
48 :     jm and im have the same "source" and
49 :     jm <= im
50 :    
51 :     A new code release must have the property that it can bring any
52 :     "older" compatible release up to its release.
53 :    
54 :     Note that there is an issue relating to the code to build/install packages.
55 :     Since a system may be radically restructured between releases of code, the
56 :     code to build a "package" and the code to "install" a package are radically
57 :     separated. For example, the code in P2Pupdate.pm for building an assignment
58 :     package and the code for installing an assignment package both apply to the
59 :     release of code current on the system containing P2Pupdate.pm. In fact, the
60 :     code releases may be quite different on two synchronizing systems.
61 :    
62 :     To make things work the following rules must be observed:
63 :    
64 :     1. a code release is a tar file containing VERSION, Packages,
65 :     bin/ToolTemplates, and CGI/ToolTemplates. The installing system needs
66 :     to place these at the appropriate spots, and then run bring_system_up_to_date,
67 :     which is supposed to do any required restructuring.
68 :    
69 :     2. an assignments package is a tar file containing a single directory. The directory
70 :     contains subdirectories -- one per genome. Each genome subdirectory contains zero
71 :     or more files. The name of the file is the "user" and the contents will be the
72 :     assignments made by that user.
73 :    
74 :     3. an annotations package is a tar file containing a single directory. The files in
75 :     the directory are named by genome. They contain the annotations for the genome.
76 :    
77 :     =cut
78 :    
79 :     =pod
80 :    
81 :     =head1 what_code_do_I_have
82 :    
83 :     usage: &what_code_do_I_have($fig_base)
84 :    
85 :     This just returns the current version of the code.
86 :    
87 :     =cut
88 :    
89 :     sub what_code_do_I_have {
90 :     my($fig_base) = @_;
91 :    
92 : olson 1.34 my $version = &FIG::file_read("$fig_base/VERSION");
93 :     chomp $version;
94 : efrank 1.1 return $version;
95 :     }
96 :    
97 :     =pod
98 :    
99 :     =head1 updatable_code
100 :    
101 :     usage: &updatable_code_code($v1,$v2)
102 :    
103 :     This just returns true iff the two versions of code are compatible and $v1
104 :     is "more recent".
105 :    
106 :     =cut
107 :    
108 :     sub updatable_code {
109 :     my($v1,$v2) = @_;
110 :     my($i,$v1p,$v1n,$v2p,$v2n);
111 :    
112 :     my @v1 = split(/\./,$v1);
113 :     my @v2 = split(/\./,$v2);
114 :     if (@v1 < @v2) { return 0 }
115 :    
116 :     for ($i=0; ($i < $#v2) && ($v1[$i] eq $v2[$i]); $i++) {}
117 :     if ($i == $#v2)
118 :     {
119 :     $v1[$i] =~ /^(.*[^\d])(\d+)$/;
120 :     $v1p = $1;
121 :     $v1n = $2;
122 :    
123 :     $v2[$i] =~ /^(.*[^\d])(\d+)$/;
124 :     $v2p = $1;
125 :     $v2n = $2;
126 :    
127 :     return (($v2p eq $v1p) && ($v2n < $v1n));
128 :     }
129 :     return 0;
130 :     }
131 :    
132 :     =pod
133 :    
134 :     =head1 package_code
135 :    
136 :     usage: &package_code($fig_disk,$file)
137 :    
138 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
139 :     the updated code release will be taken.
140 :    
141 :     $file must be an absolute filename where the "code package" will be built.
142 :    
143 :     =cut
144 :    
145 :     sub package_code {
146 :     my($fig_disk,$file) = @_;
147 :    
148 :     &force_absolute($fig_disk);
149 :     &force_absolute($file);
150 : olson 1.34 my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
151 : overbeek 1.2 my $current_release = $tmp[0];
152 : olson 1.34 chomp $current_release;
153 : overbeek 1.2
154 :     &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
155 : efrank 1.1 }
156 :    
157 :     sub force_absolute {
158 :     my($file) = @_;
159 :    
160 :     if (substr($file,0,1) ne "/")
161 :     {
162 : overbeek 1.2 print "Error: Please use absolute file names (i.e., /Users/fig/... or /home/fig/...)\n";
163 :     exit;
164 : efrank 1.1 }
165 :     }
166 :    
167 :     =pod
168 :    
169 :     =head1 install_code
170 :    
171 :     usage: &install_code($fig_disk,$package)
172 :    
173 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
174 :    
175 :     $package must be an absolute filename where the "code package" from which to make
176 :     the update exists.
177 :    
178 :     Note that this routine does not check that the updated code is compatible, or even less
179 :     current. It is assumed that upper level logic is doing that.
180 :    
181 :     =cut
182 :    
183 :     sub install_code {
184 :     my($fig_disk,$package) = @_;
185 :     my $fig_base = "$fig_disk/FIG";
186 :     &force_absolute($fig_base);
187 :     &force_absolute($package);
188 :    
189 :     if (getcwd() !~ /FIGdisk$/) { print die "Sorry, you must run this while in $FIG_Config::fig_disk" }
190 :    
191 :    
192 :     (! -d "$fig_disk/BackupCode") || &FIG::run("rm -rf $fig_disk/BackupCode");
193 :     mkdir("$fig_disk/BackupCode",0777) || die "Could not make the BackupCode directory";
194 :     (! -d "$fig_disk/BackupEnv") || &FIG::run("rm -rf $fig_disk/BackupEnv");
195 :     mkdir("$fig_disk/BackupEnv",0777) || die "Could not make the BackupEnv directory";
196 :    
197 :     my $version = &what_code_do_I_have($fig_base);
198 :     &FIG::run("cd $fig_disk; mv README install lib man env src $fig_disk/BackupEnv");
199 :     &FIG::run("cd $fig_base; mv VERSION Packages CGI $fig_disk/BackupCode");
200 :     print STDERR "made backups\n";
201 :    
202 :     &FIG::run("cd $fig_disk; tar xzf $package");
203 :     print STDERR "untarred new code\n";
204 :    
205 :     &fix_config("$fig_base/Packages/FIG_Config.pm","$fig_disk/BackupCode/Packages/FIG_Config.pm");
206 :     &FIG::run("cd $fig_base/bin; touch ToolTemplates/*/*; make all");
207 :     &FIG::run("cd $fig_base/CGI; touch ToolTemplates/*/*; make all");
208 :     print STDERR "installed new bin and CGI\n";
209 :    
210 :     &FIG::run("bring_system_up_to_date $version");
211 :     }
212 :    
213 :     =pod
214 :    
215 :     =head1 package_lightweight_code
216 :    
217 :     usage: &package_lightweight_code($fig_disk,$file)
218 :    
219 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
220 :     the updated code release will be taken.
221 :    
222 :     $file must be an absolute filename where the "code package" will be built.
223 :    
224 :     =cut
225 :    
226 :     sub package_lightweight_code {
227 :     my($fig_disk,$file) = @_;
228 :    
229 :     &force_absolute($fig_disk);
230 :     &force_absolute($file);
231 : olson 1.34 my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
232 : overbeek 1.2 my $current_release = $tmp[0];
233 : olson 1.34 chomp $current_release;
234 : overbeek 1.2
235 :     &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
236 : efrank 1.1 }
237 :    
238 :     =pod
239 :    
240 :     =head1 install_lightweight_code
241 :    
242 :     usage: &install_lightweight_code($fig_disk,$package)
243 :    
244 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
245 :    
246 :     $package must be an absolute filename where the "code package" from which to make
247 :     the update exists.
248 :    
249 :     Note that this routine does not check that the updated code is compatible, or even less
250 :     current. It is assumed that upper level logic is doing that.
251 :    
252 :     =cut
253 :    
254 :     sub install_lightweight_code {
255 :     my($fig_disk,$package) = @_;
256 :     my $fig_base = "$fig_disk/FIG";
257 :     &force_absolute($fig_base);
258 :     &force_absolute($package);
259 :    
260 : overbeek 1.2 if (! mkdir("$fig_disk/Tmp$$",0777))
261 :     {
262 :     print "Error: could not make $fig_disk/Tmp$$\n";
263 :     exit;
264 :     }
265 : efrank 1.1
266 : overbeek 1.2 &FIG::run("cd $fig_disk/Tmp$$; tar xzf $package");
267 :     if (! opendir(TMP,"$fig_disk/Tmp$$"))
268 :     {
269 :     print "Error: could not open $fig_disk/Tmp$$\n";
270 :     exit;
271 :     }
272 : efrank 1.1
273 : overbeek 1.2 my @rels = grep { $_ !~ /^\./ } readdir(TMP);
274 :     closedir(TMP);
275 :     if (@rels != 1)
276 :     {
277 :     print "Error: Bad code package: $package\n";
278 :     exit;
279 :     }
280 : efrank 1.1
281 : overbeek 1.2 my $new_release = $rels[0];
282 :     if (-d "$fig_disk/dist/releases/$new_release")
283 :     {
284 :     print "Error: $new_release already exists; we are doing nothing\n";
285 :     exit;
286 :     }
287 : efrank 1.1
288 : efrank 1.3 &FIG::run("mv $fig_disk/Tmp$$/$new_release $fig_disk/dist/releases");
289 :     &FIG::run("rm -rf $fig_disk/Tmp$$");
290 : olson 1.22
291 :     #
292 :     # Ugh. For now, find the arch in the fig config file $fig_disk/config/fig-user-env.sh"
293 :     #
294 :    
295 :     my $arch;
296 :     open(FH, "<$fig_disk/config/fig-user-env.sh");
297 :     while (<FH>)
298 :     {
299 :     if (/RTARCH="(.*)"/)
300 :     {
301 :     $arch = $1;
302 :     last;
303 :     }
304 :     }
305 :     close(FH);
306 :    
307 :     if ($arch eq "")
308 :     {
309 :     die "Couldn't determine SEED install architecture, not switching to release.";
310 :     }
311 :    
312 :     $ENV{RTARCH} = $arch;
313 : olson 1.32
314 :     #
315 :     # Need to put the ext_bin in the path.
316 :     #
317 :    
318 :     $ENV{PATH} .= ":$FIG_Config::ext_bin";
319 : olson 1.22
320 : efrank 1.9 &FIG::run("$FIG_Config::bin/switch_to_release $new_release");
321 : efrank 1.1 }
322 :    
323 :    
324 :     sub fix_config {
325 :     my($new,$old) = @_;
326 :     my($line,$i);
327 :    
328 : olson 1.34 my @new = &FIG::file_read($new);
329 :     foreach $line (&FIG::file_read($old))
330 : efrank 1.1 {
331 :     if ($line =~ /^(\S+)\s+\=/)
332 :     {
333 : olson 1.33 my $var = $1;
334 :     my $varQ = quotemeta $var;
335 : efrank 1.1
336 :     for ($i=0; ($i < $#new) && ($new[$i] !~ /^$varQ\s+\=/); $i++) {}
337 :     if ($i == $#new)
338 :     {
339 :     splice(@new,$i,0,$line);
340 :     }
341 :     else
342 :     {
343 :     splice(@new,$i,1,$line);
344 :     }
345 :     }
346 :     }
347 :     open(NEW,">$new") || confess "could not overwrite $new";
348 :     foreach $line (@new)
349 :     {
350 :     print NEW $line;
351 :     }
352 :     close(NEW);
353 :     }
354 :    
355 :     =pod
356 :    
357 :     =head1 what_genomes_will_I_sync
358 :    
359 :     usage: &what_genomes_will_I_sync($fig_base,$who)
360 :    
361 :     This routine returns the list of genome IDs that you are willing to sync with $who.
362 :    
363 :     =cut
364 :    
365 :     sub what_genomes_will_I_sync {
366 :     my($fig_base,$who) = @_;
367 :    
368 :     # This is the promiscuous version - it will sync all genomes with anyone.
369 :    
370 :     opendir(GENOMES,"$fig_base/Data/Organisms") || die "could not open $fig_base/Data/Organisms";
371 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(GENOMES);
372 :     closedir(GENOMES);
373 :     return @genomes;
374 :     }
375 :    
376 :     =pod
377 :    
378 :     =head1 package_annotations
379 :    
380 : overbeek 1.29 usage: &package_annotations($fig,$genomes,$file)
381 : efrank 1.1
382 :     $genomes is a pointer to a list of genome IDs that will be exchanged.
383 :    
384 :     $file must be an absolute filename where the "annotation package" will be built.
385 :    
386 :     =cut
387 :    
388 :     sub package_annotations {
389 : overbeek 1.29 my($fig,$who,$date,$genomes,$file) = @_;
390 :     my $fig_base = "$FIG_Config::fig_disk/FIG";
391 : efrank 1.1
392 : overbeek 1.29 if (open(ANNOTATIONS,">$file"))
393 : efrank 1.1 {
394 : overbeek 1.29 my @annotations = sort { $a->[0] cmp $b->[0] } $fig->annotations_made($genomes,$who,$date);
395 : olson 1.33 foreach my $x (@annotations)
396 : efrank 1.1 {
397 : overbeek 1.29 print ANNOTATIONS join("\n",@$x),"\n///\n";
398 : efrank 1.1 }
399 : overbeek 1.29 print ANNOTATIONS "//\n";
400 : efrank 1.15
401 : olson 1.33 foreach my $x (@annotations)
402 : efrank 1.15 {
403 : olson 1.33 my $peg = $x->[0];
404 : overbeek 1.30 my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
405 :     print ANNOTATIONS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";
406 : efrank 1.15 }
407 : overbeek 1.29 print ANNOTATIONS "//\n";
408 :    
409 : olson 1.33 foreach my $x (@annotations)
410 : efrank 1.15 {
411 : olson 1.33 my $peg;
412 : overbeek 1.29 ($peg,undef) = @$x;
413 : overbeek 1.30 my $seq = $fig->get_translation($peg);
414 : overbeek 1.29 &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);
415 : efrank 1.15 }
416 : overbeek 1.29 close(ANNOTATIONS);
417 : efrank 1.15 }
418 : efrank 1.1 }
419 :    
420 : overbeek 1.29
421 : efrank 1.1 =pod
422 :    
423 :     =head1 install_annotations
424 :    
425 :     usage: &install_annotations($fig_disk,$package)
426 :    
427 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
428 :    
429 :     $package must be an absolute filename where the "annotations package" from which to make
430 :     the update exists.
431 :    
432 :     =cut
433 :    
434 :     sub install_annotations {
435 : overbeek 1.29 my($fig,$package) = @_;
436 :     my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
437 :     my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq,$line,@ann,$ann);
438 : efrank 1.1 my($genome);
439 :    
440 : overbeek 1.29 my $fig_disk = $FIG_Config::fig_disk;
441 :     open(IN,"<$package") || die "could not open $package";
442 :     $/ = "\n//\n";
443 :     if (defined($line = <IN>))
444 :     {
445 : olson 1.33 my(@annotations);
446 :    
447 : overbeek 1.29 $line =~ s/\n\/\/\n/\n/s;
448 :     @ann = split(/\n\/\/\/\n/,$line);
449 :     foreach $ann (@ann)
450 :     {
451 :     if ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s)
452 :     {
453 :     push(@annotations,[$1,$2,$3,$4]);
454 :     }
455 :     }
456 :     $/ = "\n";
457 :     while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
458 :     {
459 : olson 1.34 chomp $line;
460 : overbeek 1.29 ($peg,$aliases,$org,$func) = split(/\t/,$line);
461 :     $pegs{$peg} = [$aliases,$org,$func];
462 :     }
463 :    
464 :     if ($line) { $line = <IN> }
465 :     while (defined($line) && ($line !~ /^\/\//))
466 :     {
467 :     if ($line =~ /^>(\S+)/)
468 :     {
469 :     $peg = $1;
470 :     @seq = ();
471 :     while (defined($line = <IN>) && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
472 :     {
473 :     push(@seq,$line);
474 :     $line = <IN>;
475 :     }
476 :     $seq = join("",@seq);
477 :     $seq =~ s/[ \n\t]//gs;
478 :     $seq_of{$peg} = uc $seq;
479 :     }
480 :     }
481 :     close(IN);
482 :     $trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);
483 :     @annotations = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) }
484 :     map { ($peg = $trans_pegs->{$_->[0]}) ? [$peg,$_->[1],$_->[2],$_->[3]] : () }
485 :     @annotations;
486 :    
487 :     if (-d "$fig_disk/BackupAnnotations") { system "rm -rf $fig_disk/BackupAnnotations" }
488 :     mkdir("$fig_disk/BackupAnnotations",0777);
489 :     mkdir("$fig_disk/BackupAnnotations/New",0777);
490 : olson 1.33 my $i;
491 :     for ($i=0; ($i < @annotations); $i++)
492 : overbeek 1.29 {
493 :     if (($i == 0) || ($fig->genome_of($annotations[$i]->[0]) ne $fig->genome_of($annotations[$i-1]->[0])))
494 :     {
495 :     if ($i != 0)
496 :     {
497 :     close(OUT);
498 :     }
499 :     $genome = $fig->genome_of($annotations[$i]->[0]);
500 :     open(OUT,">$fig_disk/BackupAnnotations/New/$genome")
501 :     || die "could not open $fig_disk/BackupAnnotations/New/$genome";
502 :     }
503 :     print OUT join("\n",@{$annotations[$i]}),"\n//\n";
504 :     }
505 :     if ($i > 0) { close(OUT) }
506 :     }
507 : efrank 1.15
508 : efrank 1.1 opendir(TMP,"$fig_disk/BackupAnnotations/New") || die "could not open $fig_disk/BackupAnnotations/New";
509 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
510 :     closedir(TMP);
511 :     foreach $genome (@genomes)
512 :     {
513 : efrank 1.14 next if (! -d "$fig_disk/FIG/Data/organisms/$genome");
514 : efrank 1.13
515 : efrank 1.1 print STDERR "installing $fig_disk/FIG/Data/Organisms/$genome/annotations\n";
516 :     if (-s "$fig_disk/FIG/Data/Organisms/$genome/annotations")
517 :     {
518 :     &FIG::run("cp -p $fig_disk/FIG/Data/Organisms/$genome/annotations $fig_disk/BackupAnnotations/$genome");
519 : efrank 1.9 &FIG::run("$FIG_Config::bin/merge_annotations $fig_disk/BackupAnnotations/$genome $fig_disk/BackupAnnotations/New/$genome > $fig_disk/FIG/Data/Organisms/$genome/annotations");
520 : efrank 1.1 }
521 :     else
522 :     {
523 :     &FIG::run("cp $fig_disk/BackupAnnotations/New/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
524 :     }
525 : olson 1.37 chmod 0777,"$fig_disk/FIG/Data/Organisms/$genome/annotations";
526 : efrank 1.1 }
527 : efrank 1.9 &FIG::run("$FIG_Config::bin/index_annotations");
528 : efrank 1.1 }
529 :    
530 :     =pod
531 :    
532 :     =head1 restore_annotations
533 :    
534 :     usage: &restore_annotations($fig_disk);
535 :    
536 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
537 :    
538 :     =cut
539 :    
540 :     sub restore_annotations {
541 :     my($fig_disk) = @_;
542 :    
543 :     &force_absolute($fig_disk);
544 :     (-d "$fig_disk/BackupAnnotations") || die "could not find an active backup";
545 :     opendir(TMP,"$fig_disk/BackupAnnotations") || die "could not open $fig_disk/BackupAnnotations";
546 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
547 :     closedir(TMP);
548 : olson 1.33 foreach my $genome (@genomes)
549 : efrank 1.1 {
550 :     unlink("$fig_disk/FIG/Data/Organisms/$genome/annotations");
551 :     &FIG::run("cp $fig_disk/BackupAnnotations/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
552 : olson 1.37 system "chmod 777 $fig_disk/FIG/Data/Organisms/$genome/annotations";
553 : efrank 1.1 }
554 : efrank 1.9 &FIG::run("$FIG_Config::bin/index_annotations");
555 : efrank 1.1 }
556 :    
557 :     =pod
558 :    
559 :     =head1 package_aassignments
560 :    
561 : overbeek 1.20 usage: package_assignments($fig,$user,$who,$date,$genomes,$file)
562 : efrank 1.1
563 : overbeek 1.20 $user designates the user wishing to get the assignments
564 : efrank 1.1
565 : overbeek 1.5 $who designates whose assignments you want (defaults to "master")
566 :    
567 :     $date if given indicates a point in time (get assignments after that point)
568 :    
569 : efrank 1.1 $genomes is a pointer to a list of genome IDs that will be exchanged.
570 :    
571 :     $file must be an absolute filename where the "assignment package" will be built.
572 :    
573 :     =cut
574 :    
575 :     sub package_assignments {
576 : overbeek 1.20 my($fig,$user,$who,$date,$genomes,$file) = @_;
577 : overbeek 1.27 my($genome,$x,$org,$curr,$peg);
578 : overbeek 1.4 $who = $who ? $who : "master";
579 : overbeek 1.7 $date = $date ? $date : 0;
580 : overbeek 1.27
581 : overbeek 1.20 if (open(ASSIGNMENTS,">$file"))
582 : efrank 1.1 {
583 : overbeek 1.20 print ASSIGNMENTS "$user\t$who\t$date\n";
584 : olson 1.33 my @assignments = sort { $a->[0] cmp $b->[0] } $fig->assignments_made_full($genomes,$who,$date);
585 : overbeek 1.36 my @curr_assignments = ();
586 : overbeek 1.27 foreach $x (@assignments)
587 : efrank 1.1 {
588 : overbeek 1.36 my($peg, $function, undef, undef) = @$x;
589 :     if ($function eq $fig->function_of($peg,$who))
590 :     {
591 :     print ASSIGNMENTS join("\t", $peg, $function),"\n";
592 :     push(@curr_assignments,$x);
593 :     }
594 : overbeek 1.4 }
595 : overbeek 1.27 print ASSIGNMENTS "//\n";
596 :    
597 : overbeek 1.36 foreach $x (@curr_assignments)
598 : overbeek 1.27 {
599 :     ($peg,undef) = @$x;
600 : overbeek 1.30 my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
601 : olson 1.33
602 :     my $alias_txt = join(",",@aliases);
603 :     my $gs_txt = $fig->genus_species($fig->genome_of($peg));
604 :     my $func_txt = scalar $fig->function_of($peg);
605 :    
606 :     print ASSIGNMENTS join("\t",($peg,
607 :     $alias_txt,
608 :     $gs_txt,
609 :     $func_txt)) . "\n";
610 : overbeek 1.28 }
611 :     print ASSIGNMENTS "//\n";
612 :    
613 : overbeek 1.36 foreach $x (@curr_assignments)
614 : overbeek 1.28 {
615 :     ($peg,undef) = @$x;
616 : overbeek 1.30 my $seq = $fig->get_translation($peg);
617 : overbeek 1.27 &FIG::display_id_and_seq($peg,\$seq,\*ASSIGNMENTS);
618 :     }
619 :     close(ASSIGNMENTS);
620 : efrank 1.1 }
621 :     }
622 :    
623 :     =pod
624 :    
625 :     =head1 install_assignments
626 :    
627 : overbeek 1.20 usage: &install_assignments($package)
628 : efrank 1.1
629 : overbeek 1.20 $package must be a filename where the "assignments package" from which to make
630 :     the assignment set exists
631 : efrank 1.1
632 :     =cut
633 :    
634 :     sub install_assignments {
635 : overbeek 1.35 my($fig,$package,$make_assignments) = @_;
636 : overbeek 1.29 my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
637 :     my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq);
638 : efrank 1.1
639 : overbeek 1.20 open(IN,"<$package") || die "could not open $package";
640 :     my $line = <IN>;
641 : olson 1.34 chomp $line;
642 : overbeek 1.20 ($user,$who,$date) = split(/\t/,$line);
643 : olson 1.26 $userR = $user;
644 :     $userR =~ s/^master://;
645 : overbeek 1.29
646 : overbeek 1.30 while (defined($line = <IN>) && ($line !~ /^\/\//))
647 : overbeek 1.29 {
648 :     if ($line =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S.*\S)/)
649 :     {
650 :     push(@assignments,[$1,$2]);
651 :     }
652 :     }
653 :     while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
654 :     {
655 : olson 1.34 chomp $line;
656 : overbeek 1.29 ($peg,$aliases,$org,$func) = split(/\t/,$line);
657 :     $pegs{$peg} = [$aliases,$org,$func];
658 :     }
659 :    
660 :     if ($line) { $line = <IN> }
661 :     while (defined($line) && ($line !~ /^\/\//))
662 :     {
663 :     if ($line =~ /^>(\S+)/)
664 :     {
665 :     $peg = $1;
666 :     @seq = ();
667 :     while (defined($line = <IN>) && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
668 :     {
669 :     push(@seq,$line);
670 :     $line = <IN>;
671 :     }
672 :     $seq = join("",@seq);
673 :     $seq =~ s/[ \n\t]//gs;
674 :     $seq_of{$peg} = uc $seq;
675 :     }
676 :     }
677 :     close(IN);
678 :     $trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);
679 :    
680 : olson 1.26 &FIG::verify_dir("$FIG_Config::data/Assignments/$userR");
681 : overbeek 1.29 my $file = &FIG::epoch_to_readable($date) . ":$who:imported";
682 : overbeek 1.20 $file =~ s/\//-/g;
683 : overbeek 1.35
684 :     if (! $make_assignments)
685 :     {
686 :     open(OUT,">$FIG_Config::data/Assignments/$userR/$file")
687 :     || die "could not open $FIG_Config::data/Assignments/$userR/$file";
688 :     }
689 : overbeek 1.29
690 :     foreach $peg (keys(%$trans_pegs))
691 : overbeek 1.20 {
692 : overbeek 1.29 $peg_to = $trans_pegs->{$peg};
693 :     $func = $pegs{$peg}->[2];
694 :     if ($fig->function_of($peg_to) ne $func)
695 :     {
696 : overbeek 1.35 if ($make_assignments)
697 :     {
698 :     if ($user =~ /master:(.*)/)
699 :     {
700 :     $userR = $1;
701 :     $fig->assign_function($peg_to,"master",$func,"");
702 :     if ($userR ne "none")
703 :     {
704 :     $fig->add_annotation($peg_to,$userR,"Set master function to\n$func\n");
705 :     }
706 :     }
707 :     else
708 :     {
709 :     $fig->assign_function($peg_to,$user,$func,"");
710 :     if ($user ne "none")
711 :     {
712 :     $fig->add_annotation($peg_to,$user,"Set function to\n$func\n");
713 :     }
714 :     }
715 :     }
716 :     else
717 :     {
718 :     print OUT "$peg_to\t$func\n";
719 :     }
720 : overbeek 1.29 }
721 : overbeek 1.20 }
722 : overbeek 1.35 if (! $make_assignments)
723 :     {
724 :     close(OUT);
725 :     if (! -s "$FIG_Config::data/Assignments/$userR/$file") { unlink("$FIG_Config::data/Assignments/$userR/$file") }
726 :     }
727 : efrank 1.1 }
728 :    
729 :     =pod
730 :    
731 :     =head1 package_translation_rules
732 :    
733 :     usage: &package_translation_rules($fig_base,$file)
734 :    
735 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
736 :     the updated code release will be taken.
737 :    
738 :     $file must be an absolute filename where the "translation_rules package" will be built.
739 :    
740 :     =cut
741 :    
742 :     sub package_translation_rules {
743 :     my($fig_base,$file) = @_;
744 :    
745 :     &FIG::run("cp $fig_base/Data/Global/function.synonyms $file");
746 :     }
747 :    
748 :     =pod
749 :    
750 :     =head1 install_translation_rules
751 :    
752 : efrank 1.10 usage: &install_translation_rules($fig_disk,$from,$package)
753 : efrank 1.1
754 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
755 :    
756 :     $package must be an absolute filename where the "translation_rules package" from which to make
757 :     the update exists.
758 :    
759 :     =cut
760 :    
761 :     sub install_translation_rules {
762 : efrank 1.10 my($fig_disk,$from,$package) = @_;
763 : efrank 1.1
764 :     my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
765 :     &force_absolute($fig_disk);
766 :     if (-d "$fig_disk/BackupTranslation_Rules") { system "rm -rf $fig_disk/BackupTranslation_Rules" }
767 :     mkdir("$fig_disk/BackupTranslation_Rules",0777);
768 : disz 1.31 chmod 02777,"$fig_disk/BackupTranslation_Rules";
769 : efrank 1.1 if (-s $file)
770 :     {
771 :     &FIG::run("cp $file $fig_disk/BackupTranslation_Rules");
772 :     }
773 : efrank 1.10 &FIG::run("$FIG_Config::bin/merge_translation_rules $fig_disk/BackupTranslation_Rules/function.synonyms $package $from > $file");
774 : disz 1.31 chmod 02777,$file;
775 : efrank 1.1 }
776 :    
777 :     =pod
778 :    
779 :     =head1 restore_translation_rules
780 :    
781 :     usage: &restore_translation_rules($fig_disk);
782 :    
783 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
784 :    
785 :     =cut
786 :    
787 :     sub restore_translation_rules {
788 :     my($fig_disk) = @_;
789 :    
790 :     &force_absolute($fig_disk);
791 :    
792 :     my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
793 :     (-s "$fig_disk/BackupTranslation_Rules/function.synonyms") || die "could not find an active backup";
794 :     if (-s "$fig_disk/BackupTranslation_Rules/function.synonyms")
795 :     {
796 :     &FIG::run("cp $fig_disk/BackupTranslation_Rules/function.synonyms $file");
797 : olson 1.37 chmod 0777, $file;
798 : efrank 1.1 }
799 :     }
800 :    
801 : overbeek 1.23 sub package_subsystems {
802 : overbeek 1.30 my($fig,$file,$just_exchangable) = @_;
803 : overbeek 1.23 my($ssa);
804 :    
805 : overbeek 1.27 $just_exchangable = defined($just_exchangable) ? $just_exchangable : 1;
806 :     my @exchangable = grep { (! $just_exchangable) || $fig->is_exchangable_subsystem($_) }
807 :     $fig->all_subsystems;
808 :    
809 : overbeek 1.23 my $fig = new FIG;
810 : overbeek 1.24 if ((@exchangable > 0) && open(SUB,">$file"))
811 : overbeek 1.23 {
812 : overbeek 1.24 foreach $ssa (@exchangable)
813 : overbeek 1.23 {
814 : overbeek 1.25 # print STDERR "writing $ssa to $file\n";
815 :     my($spreadsheet,$notes) = $fig->exportable_subsystem($ssa);
816 : overbeek 1.23 print SUB join("",@$spreadsheet),join("",@$notes),"########################\n";
817 :     }
818 :     close(SUB);
819 :     }
820 : overbeek 1.25 else
821 :     {
822 :     # print STDERR &Dumper(\@exchangable,$file);
823 :     }
824 : overbeek 1.23 }
825 :    
826 : overbeek 1.30 sub install_subsystems {
827 :     my($fig,$package) = @_;
828 :    
829 : disz 1.31 &FIG::run("$FIG_Config::bin/import_subsystems master last_release < $package");
830 : overbeek 1.30 }
831 :    
832 : olson 1.37
833 :     =pod
834 :    
835 :     =head2 unpack_packaged_subsystem
836 :    
837 :     Unpack a packaged subsystem (from the clearinghouse or a p2p transfer)
838 :     into a directory; this will create a directory named as the subsystem
839 :     and formatted like the standard subsystem directories, as well as a
840 :     file of assignments and a file of sequences in fasta format.
841 :    
842 :     Returns the name of the subsystem.
843 :    
844 :     =cut
845 :    
846 :     sub unpack_packaged_subsystem
847 :     {
848 :     my($fig, $file, $target_dir) = @_;
849 :    
850 :     my $user = $fig->get_user();
851 :    
852 :     &FIG::verify_dir($target_dir);
853 :    
854 :     my $fh;
855 :    
856 :     if (!open($fh, "<$file"))
857 :     {
858 :     warn "unpack_packaged_subsystem: cannot open $file: $!";
859 :     return undef;
860 :     }
861 :    
862 :     #
863 :     # We scan the file, breaking it up into sections and writing
864 :     # to the appropriate places.
865 :     #
866 :     # First the header.
867 :     #
868 :    
869 :     local $/ = "\n//\n";
870 :    
871 :     my $header = <$fh>;
872 :     chomp $header;
873 :    
874 :     my ($name, $version, $exchangable, $curation) = split(/\n/, $header);
875 :    
876 :     print "Importing name=$name version=$version exch=$exchangable curation='$curation'\n";
877 :    
878 :     #
879 :     # Pull in roles, subsets, and spreadsheet. These will be written to the new
880 :     # spreadsheet file.
881 :     #
882 :    
883 :     my $roles = <$fh>;
884 :     chomp $roles;
885 :    
886 :     my $subsets = <$fh>;
887 :     chomp $subsets;
888 :    
889 :     my $spreadsheet = <$fh>;
890 :     chomp $spreadsheet;
891 :    
892 :    
893 :     #
894 :     # Pull the assignments and sequences. These go to their own files.
895 :     #
896 :    
897 :     my $assignments = <$fh>;
898 :     chomp $assignments;
899 :    
900 :     my $sequences = <$fh>;
901 :     chomp $sequences;
902 :    
903 :     #
904 :     # And the notes; these will be written to the subsystem dir.
905 :     #
906 :    
907 :     my $notes = <$fh>;
908 :     chomp $notes;
909 :    
910 :     close($fh);
911 :    
912 :     #
913 :     # Everything is read. Now to write it all back out again.
914 :     #
915 :    
916 :     #
917 :     # First the subsystem.
918 :     #
919 :    
920 :     my $ss_path = "$target_dir/subsystem";
921 :     &FIG::verify_dir($ss_path);
922 :    
923 :     open($fh, ">$ss_path/EXCHANGABLE");
924 :     print $fh "$exchangable\n";
925 :     close($fh);
926 :    
927 :     open($fh, ">$ss_path/VERSION");
928 :     print $fh "$version\n";
929 :     close($fh);
930 :    
931 :     open($fh, ">$ss_path/curation.log");
932 :     print $fh "$curation\n";
933 :     my $now = time;
934 :     print $fh "$now\t$user\timported\n";
935 :     close($fh);
936 :    
937 :     open($fh, ">$ss_path/notes");
938 :     print $fh "$notes\n";
939 :     close($fh);
940 :    
941 :     open($fh, ">$ss_path/spreadsheet");
942 :     print $fh "$roles\n";
943 :     print $fh "//\n";
944 :     print $fh "$subsets\n";
945 :     print $fh "//\n";
946 :     print $fh "$spreadsheet\n";
947 :     close($fh);
948 :    
949 :     open($fh, ">$target_dir/subsystem_name");
950 :     print $fh "$name\n";
951 :     close($fh);
952 :    
953 :     open($fh, ">$target_dir/assignments");
954 :     print $fh "$assignments\n";
955 :     close($fh);
956 :    
957 :     open($fh, ">$target_dir/seqs.fasta");
958 :     print $fh "$sequences\n";
959 :     close($fh);
960 :    
961 :     return $name;
962 :     }
963 :    
964 : olson 1.33 package SubsystemFile;
965 :    
966 :     use Data::Dumper;
967 :     use strict;
968 :    
969 :     sub new
970 :     {
971 :     my($class, $qdir, $file, $fig) = @_;
972 :     my(@info);
973 :    
974 :     @info = FIG::file_head($file, 4);
975 :     if (!@info)
976 :     {
977 :     warn "Cannot open $file\n";
978 :     return undef;
979 :     }
980 :    
981 :     chomp(@info);
982 :    
983 :     my $name = $info[0];
984 :     my $version = $info[1];
985 :     my $exc = $info[2];
986 :    
987 :     my @c = split(/\t/, $info[3]);
988 :    
989 :     my $curator = $c[1];
990 :    
991 :     my $self = {
992 :     qdir => $qdir,
993 :     file => $file,
994 :     name => $name,
995 :     version => $version,
996 :     exchangable => $exc,
997 :     curator => $curator,
998 : olson 1.39 curation_log => $info[3],
999 : olson 1.33 fig => $fig,
1000 :     };
1001 :    
1002 :     return bless($self, $class);
1003 :    
1004 :     }
1005 :    
1006 :     #
1007 :     # Load the export file into internal data structures.
1008 :     #
1009 :     # It's structured as
1010 :     #
1011 :     # name
1012 :     # version
1013 :     # exchangable
1014 :     # creation date <tab> curator <tab> "started"
1015 :     # //
1016 :     # roles
1017 :     # //
1018 :     # subsets
1019 :     # //
1020 :     # spreadsheet
1021 :     # //
1022 :     # assignments
1023 :     # //
1024 :     # sequences
1025 :     # //
1026 :     # notes
1027 :     # //
1028 :     #
1029 :     # Subsections:
1030 :     #
1031 :     # roles:
1032 :     #
1033 :     # abbr <tab> role-name
1034 :     #
1035 :     # subsets has meaning to the acutal subsystems, but we'll use it as a string.
1036 :     #
1037 :     # spreadsheet:
1038 :     #
1039 :     # genome <tab> variant <tab> items
1040 :     #
1041 :     # Where items is tab-separated columns, each of which is comma-separated peg number in the genome
1042 :     #
1043 :     # assignments:
1044 :     #
1045 :     # fid <tab> aliases <tab> organism <tab> function
1046 :     #
1047 :     # sequences:
1048 :     #
1049 :     # list of fasta's
1050 :     #
1051 :     # notes:
1052 :     #
1053 :     # plain text
1054 :     #
1055 :     sub load
1056 :     {
1057 :     my($self) = @_;
1058 :    
1059 :     my $fig = $self->{fig};
1060 :    
1061 :     my($fh);
1062 :    
1063 :     open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";
1064 :    
1065 :     #
1066 : olson 1.39 # Skip intro section - we already read this information in the constructor.
1067 : olson 1.33 #
1068 :    
1069 :     while (<$fh>)
1070 :     {
1071 :     chomp;
1072 :     last if m,^//,;
1073 :     }
1074 :    
1075 :     #
1076 :     # Read the roles.
1077 :     #
1078 :    
1079 :    
1080 :     my $nroles;
1081 :    
1082 :     while (<$fh>)
1083 :     {
1084 :     last if m,^//,;
1085 :    
1086 :     $self->{role_text} .= $_;
1087 :     chomp $_;
1088 :    
1089 :     my($abbr, $role) = split(/\t/);
1090 :    
1091 :     warn "Have role $role\n";
1092 :    
1093 :     push(@{$self->{roles}}, $role);
1094 :     push(@{$self->{abbrs}}, $abbr);
1095 :    
1096 :     $nroles++;
1097 :     }
1098 :    
1099 :     #
1100 :     # Read in subsets as a string.
1101 :     #
1102 :    
1103 :     while (<$fh>)
1104 :     {
1105 :     last if m,^//,;
1106 :    
1107 :     $self->{subsets_text} .= $_;
1108 :     }
1109 :    
1110 :     #
1111 :     # Read the spreadsheet.
1112 :     #
1113 :    
1114 :     while (<$fh>)
1115 :     {
1116 :     last if m,^//,;
1117 :    
1118 :     $self->{spreadsheet_text} .= $_;
1119 :    
1120 :     chomp;
1121 :    
1122 :     my($genome, $variant, @items) = split(/\t/, $_, $nroles + 2);
1123 :    
1124 :     push(@{$self->{genomes}}, $genome);
1125 :    
1126 :     my $gobj = GenomeObj->new($self, $fig, $genome, $variant, [@items]);
1127 :    
1128 :     $self->{genome_objs}->{$genome} = $gobj;
1129 :     }
1130 :    
1131 :     #
1132 :     # Read PEG info
1133 :     #
1134 :    
1135 :     while (<$fh>)
1136 :     {
1137 :     last if m,^//,;
1138 :    
1139 :     chomp;
1140 :    
1141 :     my ($peg, $aliases, $org, $func) = split(/\t/);
1142 :    
1143 :     push(@{$self->{pegs}}, [$peg, $aliases, $org, $func]);
1144 :     }
1145 :    
1146 :     #
1147 :     # Read sequence info
1148 :     #
1149 :    
1150 :     my($cur, $cur_peg);
1151 :    
1152 :     while (<$fh>)
1153 :     {
1154 :     if (/^>(fig\|\d+\.\d+\.peg\.\d+)/)
1155 :     {
1156 :     if ($cur)
1157 :     {
1158 :     $cur =~ s/\s+//gs;
1159 :     $self->{peg_seq}->{$cur_peg} = $cur;
1160 :     }
1161 :     $cur_peg = $1;
1162 :     $cur = '';
1163 :     }
1164 :     elsif (m,^//,)
1165 :     {
1166 :     $cur =~ s/\s+//gs;
1167 :     $self->{peg_seq}->{$cur_peg} = $cur;
1168 :     last;
1169 :     }
1170 :     else
1171 :     {
1172 :     $cur .= $_;
1173 :     }
1174 :     }
1175 :    
1176 :     #
1177 :     # Read notes as a string
1178 :     #
1179 :    
1180 :     while (<$fh>)
1181 :     {
1182 :     last if m,^//,;
1183 :    
1184 :     $self->{notes_txt} .= $_;
1185 :     }
1186 :    
1187 :     }
1188 :    
1189 :     #
1190 : olson 1.39 # Compute or load from cache the PEG translations for this subsystem.
1191 :     #
1192 :     sub ensure_peg_translations
1193 :     {
1194 :     my($self) = @_;
1195 :    
1196 :     #
1197 :     # First we map the PEGs in this subsystem to PEGs in the
1198 :     # local SEED.
1199 :     #
1200 :     # translate_pegs requires a hash of peg->[aliases] as the first argument,
1201 :     # and a hash of peg->sequence as the second argument.
1202 :     #
1203 :    
1204 :     my $fig = $self->{fig};
1205 :    
1206 :     my %pegs;
1207 :     my %seqs_of;
1208 :    
1209 :     for my $pegent (@{$self->{pegs}})
1210 :     {
1211 :     my($peg, $aliases, $org, $func) = @$pegent;
1212 :     $pegs{$peg} = [$aliases, $org, $func];
1213 :     $seqs_of{$peg} = $self->{peg_seq}->{$peg};
1214 :     }
1215 :    
1216 :     sub show_cb
1217 :     {
1218 :     print "$_[0]<p>\n";
1219 :     }
1220 :    
1221 :     my $cached_translation_file = "$self->{qdir}/peg_translation";
1222 :    
1223 :     my $tran_peg;
1224 :    
1225 :     if (-f $cached_translation_file and -s $cached_translation_file > 0)
1226 :     {
1227 :     #
1228 :     # Read the cached translations.
1229 :     #
1230 :    
1231 :     if (open(my $fh, "<$cached_translation_file"))
1232 :     {
1233 :     warn "Reading cached peg translations\n";
1234 :     $tran_peg = {};
1235 :     while (<$fh>)
1236 :     {
1237 :     chomp;
1238 :     my($k, $v) = split(/\t/);
1239 :     $tran_peg->{$k} = $v;
1240 :     }
1241 :     close($fh);
1242 :     }
1243 :     }
1244 :    
1245 :     if (!$tran_peg)
1246 :     {
1247 :     $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of, \&show_cb);
1248 :    
1249 :     #
1250 :     # tran_peg is now a hash from subsystem_peg->local_peg
1251 :     #
1252 :    
1253 :     #
1254 :     # Write the translations out to a file in the queue directory
1255 :     # for use during installation.
1256 :     #
1257 :    
1258 :     if (open(my $fh, ">$self->{qdir}/peg_translation"))
1259 :     {
1260 :     for my $p (keys(%$tran_peg))
1261 :     {
1262 :     my $tp = $tran_peg->{$p};
1263 :     print $fh "$p\t$tp\n";
1264 :     }
1265 :     close($fh);
1266 :     }
1267 :     }
1268 :     $self->{tran_peg} = $tran_peg;
1269 :     return $tran_peg;
1270 :     }
1271 :    
1272 :     #
1273 : olson 1.33 # Analyze this subsystem for compatibility with this SEED install.
1274 :     #
1275 :     # Returns three lists:
1276 :     #
1277 :     # A major conflict list, consisting of tuples
1278 :     # [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg
1279 :     # is the peg in the subsystem being analyzied, and $ss_func is
1280 :     # its assigned function in that subsystem. $loc_peg is the peg
1281 :     # in the local SEED, and $loc_func its local assignment. $subs is
1282 :     # the list of pairs [$subsystem_name, $role] denoting the subsystem(s)
1283 :     # that $loc_peg particpates in.
1284 :     #
1285 :     # A conflict is flagged if the local function is different than
1286 :     # the one being imported, and if the local peg is in a subsystem.
1287 :     #
1288 :     # A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].
1289 :     #
1290 :     #
1291 :     # The second list is a list of subsystem pegs that do not have
1292 :     # a local equivalent. Each entry is a triple
1293 :     # [peg, orgname, function].
1294 :     #
1295 :    
1296 :     sub analyze
1297 :     {
1298 :     my($self) = @_;
1299 :     my $fig = $self->{fig};
1300 :    
1301 : olson 1.39 my $tran_peg = $self->ensure_peg_translations();
1302 :    
1303 :     #
1304 :     # Now we walk the PEGs, determining a) which are missing
1305 :     # in the local SEED, and b) which have a conflicting assignment.
1306 :     #
1307 : olson 1.33 #
1308 : olson 1.39 # We also need to determine if this assignment will cause
1309 :     # pegs to be filled into subsystem roles that were not
1310 :     # otherwise going to be added.
1311 : olson 1.33 #
1312 : olson 1.39 # To enable this, we determine from the subsystem index
1313 :     # the list all roles that are present in subsystems on
1314 :     # this SEED.
1315 : olson 1.33 #
1316 :    
1317 : olson 1.39 my $sub_name = $self->name();
1318 :     my $subsystem_roles = $fig->subsystem_roles();
1319 :    
1320 :     my(@conflict, $minor_conflict, $missing);
1321 :    
1322 :     $missing = [];
1323 :    
1324 :     print "Determining conflicts...<p>\n";
1325 : olson 1.33
1326 :     for my $pegent (@{$self->{pegs}})
1327 :     {
1328 : olson 1.39 my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;
1329 :    
1330 :     #
1331 :     # If this peg has a local translation, determine if
1332 :     # the associated assignment conflicts with a local assignment.
1333 :     #
1334 :     # One type of conflict occurs when the new assignment would cause
1335 :     # the peg to be removed from a subsystem. This occurs when the
1336 :     # new functional assignment is different from the current
1337 :     # assignment, and the peg is already in a subsystem.
1338 :     #
1339 :     # Another type of conflict occurs when the new assignment
1340 :     # for a peg matches a role that exists in a locally-installed
1341 :     # subsystem. This will cause the peg to be added to a
1342 :     # subsystem upon refill of that subsystem.
1343 :     #
1344 :     # It is possible for both the above conditions to hold,
1345 :     # in which case a peg would be moved out of one
1346 :     # subsystem into another.
1347 :     #
1348 :     # We denote these cases in the generated conflict list by
1349 :     # annotating the entry with the list of subsystems from which
1350 :     # the peg would be removed if the assignment were to be
1351 :     # accepted, and the list of subsystems to which the
1352 :     # peg would be added.
1353 :     #
1354 :    
1355 :     if (my $loc_peg = $tran_peg->{$ss_peg})
1356 :     {
1357 :     my $subs_removed = [];
1358 :     my $subs_added = [];
1359 :    
1360 :     #
1361 :     # Determine what our locally-assigned function is, and what
1362 :     # subsystem this peg appears in.
1363 :     #
1364 :    
1365 :     my $loc_func = $fig->function_of($loc_peg);
1366 :    
1367 :     #
1368 :     # If the functions don't match, it's a conflict.
1369 :     # If the local function is in a subsystem, it's a major
1370 :     # conflict. If it's not, it's a minor conflict.
1371 :     #
1372 :     # We actually let the major/minor determination be done by
1373 :     # the analysis display code, since the difference is only in whether
1374 :     # there are subsystems.
1375 :     #
1376 :    
1377 :     if ($loc_func ne $ss_func)
1378 :     {
1379 :    
1380 :     #
1381 :     # If the function defined in the new subsystem is different than
1382 :     # the current function, we mark a conflict. Along with the conflict
1383 :     # we include a list of the subsystems in which the local peg
1384 :     # is included.
1385 :     #
1386 :     # We use the subsystems_for_peg method to determine local subsystems
1387 :     # associated with a peg. It returns a list of pairs [subsystem, rolename].
1388 :     #
1389 :    
1390 :     #
1391 :     # What if we are loading a new version of an existing subsystem, and
1392 :     # a role name has changed?
1393 :     #
1394 :     # In this case, $loc_func ne $ss_func, $loc_peg will appear in the local copy of
1395 :     # the subsystem we are loading, and hence as a candidate for removal from that subsystem.
1396 :     # This may be thought of as a spurious message, and leads me to want to remove
1397 :     # such warnings (if I'm updating a subsystem, I can expect that the pegs in that
1398 :     # subsystem will change).
1399 :     #
1400 :    
1401 :     my @removed = $fig->subsystems_for_peg($loc_peg);
1402 :     push(@$subs_removed, grep { $_->[0] ne $sub_name } @removed);
1403 :    
1404 :     #
1405 :     # We also check to see if the new function is present
1406 :     # as a role in any local subsystem. If it is, then when that subsystem
1407 :     # is refilled, this peg will appear in it.
1408 :     #
1409 :     # This is also
1410 :     #
1411 :    
1412 :     if (my $loc_ss = $subsystem_roles->{$ss_func})
1413 :     {
1414 :     push(@$subs_added, @$loc_ss);
1415 :     }
1416 :    
1417 :     push(@conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed, $subs_added]);
1418 :     }
1419 :    
1420 :     }
1421 :     else
1422 :     {
1423 :     push(@$missing, [$ss_peg, $ss_org, $ss_func]);
1424 :     }
1425 :     }
1426 :    
1427 :     my $conflict = [sort { @{$b->[4]} + @{$b->[5]} <=> @{$a->[4]} + @{$a->[5]} } @conflict];
1428 :    
1429 :     return ($conflict, $missing);
1430 :     }
1431 :    
1432 :     sub read_cached_analysis
1433 :     {
1434 :     my($self) = @_;
1435 :    
1436 :     my $cfile = "$self->{qdir}/conflicts";
1437 :     my $mfile = "$self->{qdir}/missing";
1438 :    
1439 :     my($conflict, $missing);
1440 :     $conflict = [];
1441 :     $missing = [];
1442 :    
1443 :     if (open(my $fh, "<$cfile"))
1444 :     {
1445 :    
1446 :     while (<$fh>)
1447 :     {
1448 :     chomp;
1449 :    
1450 :     my($ss_peg, $ss_func, $loc_peg, $loc_func) = split(/\t/);
1451 :    
1452 :     my $subs_removed = <$fh>;
1453 :     my $subs_added = <$fh>;
1454 :    
1455 :     chomp($subs_removed);
1456 :     chomp($subs_added);
1457 :    
1458 :     my @subs_removed_raw = split(/\t/, $subs_removed);
1459 :     my $subs_added_list = [split(/\t/, $subs_added)];
1460 :    
1461 :     my $subs_removed_list = [];
1462 :    
1463 :     while (@subs_removed_raw)
1464 :     {
1465 :     my($v1, $v2, @rest) = @subs_removed_raw;
1466 :     @subs_removed_raw = @rest;
1467 :     push(@$subs_removed_list, [$v1, $v2]);
1468 :     }
1469 :    
1470 :     push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed_list, $subs_added_list]);
1471 :     }
1472 :     }
1473 :    
1474 :     if (open(my $fh, "<$mfile"))
1475 :     {
1476 :    
1477 :     while (<$fh>)
1478 :     {
1479 :     chomp;
1480 :    
1481 :     my(@a) = split(/\t/);
1482 :     push(@$missing, [@a]);
1483 :     }
1484 :     }
1485 :    
1486 :     return($conflict, $missing);
1487 :     }
1488 :    
1489 :     #
1490 :     # Install this subsystem.
1491 :     #
1492 :     # $dont_assign is a list of local PEGs that should not have their assignments overwritten.
1493 :     #
1494 :     # We return a list of for-the-installer messages that will be presented when the install completes.
1495 :     #
1496 :     sub install
1497 :     {
1498 :     my($self, $dont_assign) = @_;
1499 :    
1500 :     my @messages;
1501 :    
1502 :     my $fig = $self->{fig};
1503 :     my $subsystems_dir = "$FIG_Config::data/Subsystems";
1504 :    
1505 :     my $sub_name = $self->name();
1506 :     $sub_name =~ s/ /_/g;
1507 :     my $sub_dir = "$subsystems_dir/$sub_name";
1508 :     my $ver = $self->version();
1509 :    
1510 :     #
1511 :     # First check to see if we already have this subsystem installed.
1512 :     #
1513 :    
1514 :     if (-d $sub_dir and (my $cur_ver = $fig->subsystem_version($sub_name)) >= $ver)
1515 :     {
1516 :     warn "Not importing $sub_name: current version $cur_ver >= imported version $ver";
1517 :     }
1518 :    
1519 :     warn "Importing $sub_name version $ver\n";
1520 :     push(@messages, "Importing $sub_name version $ver\n");
1521 :    
1522 :     if (! -d $sub_dir)
1523 :     {
1524 :     mkdir($sub_dir, 0777) or die "Cannot mkdir $sub_dir: $!";
1525 : olson 1.33 }
1526 :    
1527 : olson 1.39 #
1528 :     # Write the header/meta information.
1529 :     #
1530 :    
1531 :     my $fh;
1532 :     my $imported_from = "???";
1533 :    
1534 :     open($fh, ">$sub_dir/VERSION") or die "Cannot open $sub_dir/VERSION for writing: $!";
1535 :     print $fh "$ver\n";
1536 :     close($fh);
1537 :     chmod(0666, "$sub_dir/VERSION");
1538 :    
1539 :     open($fh, ">$sub_dir/EXCHANGABLE") or die "Cannot open $sub_dir/EXCHANGABLE for writing: $!";
1540 :     print $fh $self->exchangable() . "\n";
1541 :     close($fh);
1542 :     chmod(0666, "$sub_dir/EXCHANGABLE");
1543 :    
1544 :     open($fh, ">$sub_dir/curation.log") or die "Cannot open $sub_dir/curation.logt for writing: $!";
1545 :     print $fh "$self->{curation_log}\n";
1546 :     my $time = time;
1547 :     print $fh "$time\t$imported_from\timported_from\n";
1548 :     close($fh);
1549 :     chmod(0666, "$sub_dir/curation.log");
1550 :    
1551 :     open($fh, ">$sub_dir/notes") or die "Cannot open $sub_dir/notes for writing: $!";
1552 :     print $fh $self->{notes_txt} . "\n";
1553 :     close($fh);
1554 :     chmod(0666, "$sub_dir/notes");
1555 :    
1556 :     my $tran_peg = $self->ensure_peg_translations();
1557 : olson 1.33
1558 :     #
1559 : olson 1.39 # We can start writing the spreadsheet.
1560 : olson 1.33 #
1561 :    
1562 : olson 1.39 my $ssa_fh;
1563 :     open($ssa_fh, ">$sub_dir/spreadsheet") or die "Cannot open $sub_dir/spreadsheet for writing: $!";
1564 :    
1565 : olson 1.33 #
1566 : olson 1.39 # Start with the roles and subsets.
1567 : olson 1.33 #
1568 : olson 1.39
1569 :     print $ssa_fh $self->{role_text};
1570 :     print $ssa_fh "//\n";
1571 :    
1572 :     print $ssa_fh $self->{subsets_text};
1573 :     print $ssa_fh "//\n";
1574 : olson 1.33
1575 : olson 1.39 for my $g (@{$self->{genomes}})
1576 : olson 1.33 {
1577 : olson 1.39 my $gobj = $self->{genome_objs}->{$g};
1578 :     my ($trans_genome, @row) = $gobj->translate($tran_peg);
1579 :    
1580 :     if ($trans_genome)
1581 : olson 1.33 {
1582 : olson 1.39 print $ssa_fh join("\t", $trans_genome, $gobj->{variant}, @row), "\n";
1583 : olson 1.33 }
1584 :     }
1585 : olson 1.39
1586 :     close($ssa_fh);
1587 :    
1588 :     #
1589 :     # The subsystem itself is now in place.
1590 : olson 1.33 #
1591 : olson 1.39 # Enter the new assignments, saving the old assignments in the spool dir.
1592 : olson 1.33 #
1593 :    
1594 : olson 1.39 my $now = time;
1595 :    
1596 :     my $old_funcs_fh;
1597 :     open($old_funcs_fh, ">>$self->{qdir}/old_assignments.$now");
1598 :    
1599 :     my $curator = $self->curator();
1600 : olson 1.33
1601 :     for my $pegent (@{$self->{pegs}})
1602 :     {
1603 : olson 1.39 my($peg, $aliases, $org, $func) = @$pegent;
1604 :     my $tpeg = $tran_peg->{$peg};
1605 :    
1606 :     if (!$tpeg)
1607 :     {
1608 :     warn "Couldn't translate $peg (from $org)\n";
1609 :     push(@messages, "Couldn't translate $peg (from $org)");
1610 :     next;
1611 :     }
1612 :    
1613 :     my $old = $fig->function_of($tpeg);
1614 :    
1615 :     if ($old ne $func)
1616 : olson 1.33 {
1617 : olson 1.39 print $old_funcs_fh "$tpeg\t$old\t$curator\t$func\n";
1618 :     $fig->add_annotation($tpeg, $curator,
1619 :     "Assigning function $func based on installation of subsystem $self->{name}");
1620 : olson 1.33
1621 : olson 1.39 if ($curator =~ /master:(.*)/)
1622 :     {
1623 :     my $user = $1;
1624 :     $fig->assign_function($tpeg, "master", $func, "");
1625 :     $fig->add_annotation($tpeg, $user, "Set master function to\n$func\n");
1626 :     }
1627 :     else
1628 : olson 1.33 {
1629 : olson 1.39 $fig->assign_function($tpeg, $curator, $func, "");
1630 :     $fig->add_annotation($tpeg, $curator, "Set function to\n$func\n");
1631 : olson 1.33 }
1632 :     }
1633 :     else
1634 :     {
1635 : olson 1.39 # print "$tpeg already has assignment $func\n";
1636 : olson 1.33 }
1637 :     }
1638 : olson 1.39 close($old_funcs_fh);
1639 :     return @messages;
1640 : olson 1.33 }
1641 :    
1642 :     sub name
1643 :     {
1644 :     my($self) = @_;
1645 :     return $self->{name};
1646 :     }
1647 :    
1648 :    
1649 :     sub version
1650 :     {
1651 :     my($self) = @_;
1652 :     return $self->{version};
1653 :     }
1654 :    
1655 :     sub exchangable
1656 :     {
1657 :     my($self) = @_;
1658 :     return $self->{exchangable};
1659 :     }
1660 :    
1661 :     sub curator
1662 :     {
1663 :     my($self) = @_;
1664 :     return $self->{curator};
1665 :     }
1666 :    
1667 : olson 1.39 sub analysis_complete
1668 :     {
1669 :     my($self) = @_;
1670 :    
1671 :     return -f "$self->{qdir}/analysis_complete";
1672 :     }
1673 :    
1674 :     sub analysis_jobid
1675 :     {
1676 :     my($self) = @_;
1677 :    
1678 :     my $jid_file = "$self->{qdir}/analysis_jobid";
1679 :    
1680 :     return &FIG::file_head($jid_file, 1);
1681 :     }
1682 :    
1683 : olson 1.33 package GenomeObj;
1684 :    
1685 : olson 1.39 use strict;
1686 :     use Data::Dumper;
1687 :    
1688 :     #
1689 :     # A genomeobj is a small datatype that holds the data in a row of a
1690 :     # spreadsheet file.
1691 :     #
1692 :    
1693 : olson 1.33 sub new
1694 :     {
1695 :     my($class, $subfile, $fig, $genome, $variant, $items) = @_;
1696 :    
1697 :     my $self = {
1698 :     fig => $fig,
1699 :     subfile => $subfile,
1700 :     genome => $genome,
1701 :     variant => $variant,
1702 :     items => $items,
1703 :     };
1704 :     return bless($self, $class);
1705 :    
1706 :     }
1707 :    
1708 : olson 1.39 #
1709 :     # Translate this row to a new context.
1710 :     #
1711 :     # $trans_peg is a hash mapping from spreadsheet PEG to local PEG
1712 :     #
1713 :     sub translate
1714 :     {
1715 :     my($self, $trans_peg) = @_;
1716 :     my $fig = $self->{fig};
1717 :    
1718 :     my $genome = $self->{genome};
1719 :    
1720 :     my $parsed_items = [];
1721 :     $self->{parsed_items} = $parsed_items;
1722 :     my $trans_items = [];
1723 :     $self->{trans_items} = $trans_items;
1724 :    
1725 :     #
1726 :     # Hash of genomes seen in this row.
1727 :     my %genomes;
1728 :    
1729 :     for my $item (@{$self->{items}})
1730 :     {
1731 :     my $l = [ map { $_ eq '' ? undef : "fig|$genome.peg.$_" } split(/,/, $item) ];
1732 :     my $t = [ map { $trans_peg->{$_} } @$l ];
1733 :    
1734 :     push(@$parsed_items, $l);
1735 :     push(@$trans_items, $t);
1736 :    
1737 :     #
1738 :     # Count the genomes that are seen in the translated pegs.
1739 :     #
1740 :    
1741 :     for my $tpeg (@$t)
1742 :     {
1743 :     my $tg = $fig->genome_of($tpeg);
1744 :     $genomes{$tg}++ if $tg ne "";
1745 :     }
1746 :    
1747 :     }
1748 :    
1749 :     #
1750 :     # Now determine the dominant organism for this translated row.
1751 :     #
1752 :    
1753 :     my @orgs = sort { $genomes{$b} <=> $genomes{$a} } keys(%genomes);
1754 :    
1755 :     # print "@{$self->{items}}\n";
1756 :     # print join(" ", map { "$_: $genomes{$_} " } @orgs ), "\n";
1757 :    
1758 :     unless (@orgs == 1 # Single organism
1759 :     or
1760 :     (@orgs > 1 and $genomes{$orgs[0]} > (2 * $genomes{$orgs[1]})) # First org has > 2x the second org
1761 :     )
1762 :     {
1763 :     warn "Could not determine translation for $genome\n";
1764 :     return undef;
1765 :     }
1766 :    
1767 :     #
1768 :     # The dominant organism is the first in the list.
1769 :     #
1770 :    
1771 :     my $dom = $orgs[0];
1772 :    
1773 :     #
1774 :     # Run through the translated pegs, and remove the ones that are
1775 :     # not in the dominant organism.
1776 :     #
1777 :    
1778 :     my @res;
1779 :     for my $i (0..@$trans_items - 1)
1780 :     {
1781 :     my $t = $trans_items->[$i];
1782 :    
1783 :     my @nt;
1784 :     for my $peg (@$t)
1785 :     {
1786 :     if ($peg =~ /^fig\|(\d+\.\d+)\.peg\.(\d+)$/)
1787 :     {
1788 :     if ($1 eq $dom)
1789 :     {
1790 :     push(@nt, $2);
1791 :     }
1792 :     }
1793 :     }
1794 :     push(@res, join(",", @nt));
1795 :     }
1796 :     return $dom, @res;
1797 :     }
1798 : olson 1.33
1799 : efrank 1.1 1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3