[Bio] / FigKernelPackages / P2Pupdate.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/P2Pupdate.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.62 - (view) (download) (as text)

1 : olson 1.56 #
2 : golsen 1.62 # Copyright (c) 2003-2008 University of Chicago and Fellowship
3 : olson 1.56 # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : efrank 1.1 package P2Pupdate;
19 :    
20 : olson 1.33 use strict;
21 :    
22 : olson 1.41 use Safe;
23 : efrank 1.1 use FIG_Config;
24 :     use FIG;
25 :     use Carp;
26 :     use Data::Dumper;
27 :     use Cwd;
28 :    
29 :     =pod
30 :    
31 : parrello 1.61 =head2 Public Methods
32 :    
33 :     =head3 updating code
34 : efrank 1.1
35 :     This routine packages what is needed to upgrade an older system to the
36 :     current code. Code releases are numered
37 :    
38 :     p1n1.p2n2.p3n3...
39 :    
40 :     where "." is added at the point the code moved to another branch of
41 :     the tree. FIG, who provided the initial release of the SEED, will
42 :     number all of their code releases as
43 :    
44 :     FIGn
45 :    
46 :     where n is an integer. Suppose that between releases 13 and 14 a
47 :     second group (which we will term "Idiots" for convenience) took
48 :     release 13 and wished to branch the code tree. At that point, they
49 :     would name their first release as
50 :    
51 :     FIG13.Idiots1
52 :    
53 :     We are, of course, being both cavalier and nasty when we make such a
54 :     reference. We do, however, wish to express the view that it will
55 :     benefit everyone to attempt to reconcile differences and maintain a
56 :     single code progression as long as possible. There are often good
57 :     reasons to part ways, but we urge people to think carefully before
58 :     taking such a step.
59 :    
60 :     Two code releases
61 :    
62 :     i1.i2.i3...in
63 :     and j1.j2.j3...jm with m <= n
64 :    
65 :     are compatible iff
66 :    
67 :     ip == jp for p < m, and
68 :     jm and im have the same "source" and
69 :     jm <= im
70 :    
71 :     A new code release must have the property that it can bring any
72 :     "older" compatible release up to its release.
73 :    
74 :     Note that there is an issue relating to the code to build/install packages.
75 :     Since a system may be radically restructured between releases of code, the
76 :     code to build a "package" and the code to "install" a package are radically
77 :     separated. For example, the code in P2Pupdate.pm for building an assignment
78 :     package and the code for installing an assignment package both apply to the
79 :     release of code current on the system containing P2Pupdate.pm. In fact, the
80 :     code releases may be quite different on two synchronizing systems.
81 :    
82 :     To make things work the following rules must be observed:
83 :    
84 :     1. a code release is a tar file containing VERSION, Packages,
85 :     bin/ToolTemplates, and CGI/ToolTemplates. The installing system needs
86 :     to place these at the appropriate spots, and then run bring_system_up_to_date,
87 :     which is supposed to do any required restructuring.
88 :    
89 :     2. an assignments package is a tar file containing a single directory. The directory
90 :     contains subdirectories -- one per genome. Each genome subdirectory contains zero
91 :     or more files. The name of the file is the "user" and the contents will be the
92 :     assignments made by that user.
93 :    
94 :     3. an annotations package is a tar file containing a single directory. The files in
95 :     the directory are named by genome. They contain the annotations for the genome.
96 :    
97 :     =cut
98 :    
99 :     =pod
100 :    
101 : parrello 1.61 =head3 what_code_do_I_have
102 : efrank 1.1
103 :     usage: &what_code_do_I_have($fig_base)
104 :    
105 :     This just returns the current version of the code.
106 :    
107 :     =cut
108 :    
109 :     sub what_code_do_I_have {
110 :     my($fig_base) = @_;
111 :    
112 : olson 1.34 my $version = &FIG::file_read("$fig_base/VERSION");
113 :     chomp $version;
114 : efrank 1.1 return $version;
115 :     }
116 :    
117 :     =pod
118 :    
119 : parrello 1.61 =head3 updatable_code
120 : efrank 1.1
121 :     usage: &updatable_code_code($v1,$v2)
122 :    
123 :     This just returns true iff the two versions of code are compatible and $v1
124 :     is "more recent".
125 :    
126 :     =cut
127 :    
128 :     sub updatable_code {
129 :     my($v1,$v2) = @_;
130 :     my($i,$v1p,$v1n,$v2p,$v2n);
131 :    
132 :     my @v1 = split(/\./,$v1);
133 :     my @v2 = split(/\./,$v2);
134 :     if (@v1 < @v2) { return 0 }
135 :    
136 :     for ($i=0; ($i < $#v2) && ($v1[$i] eq $v2[$i]); $i++) {}
137 :     if ($i == $#v2)
138 :     {
139 :     $v1[$i] =~ /^(.*[^\d])(\d+)$/;
140 :     $v1p = $1;
141 :     $v1n = $2;
142 :    
143 :     $v2[$i] =~ /^(.*[^\d])(\d+)$/;
144 :     $v2p = $1;
145 :     $v2n = $2;
146 :    
147 :     return (($v2p eq $v1p) && ($v2n < $v1n));
148 :     }
149 :     return 0;
150 :     }
151 :    
152 :     =pod
153 :    
154 : parrello 1.61 =head3 package_code
155 : efrank 1.1
156 :     usage: &package_code($fig_disk,$file)
157 :    
158 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
159 :     the updated code release will be taken.
160 :    
161 :     $file must be an absolute filename where the "code package" will be built.
162 :    
163 :     =cut
164 :    
165 :     sub package_code {
166 :     my($fig_disk,$file) = @_;
167 :    
168 :     &force_absolute($fig_disk);
169 :     &force_absolute($file);
170 : olson 1.34 my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
171 : overbeek 1.2 my $current_release = $tmp[0];
172 : olson 1.34 chomp $current_release;
173 : overbeek 1.2
174 :     &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
175 : efrank 1.1 }
176 :    
177 :     sub force_absolute {
178 :     my($file) = @_;
179 :    
180 :     if (substr($file,0,1) ne "/")
181 :     {
182 : overbeek 1.2 print "Error: Please use absolute file names (i.e., /Users/fig/... or /home/fig/...)\n";
183 :     exit;
184 : efrank 1.1 }
185 :     }
186 :    
187 :     =pod
188 :    
189 : parrello 1.61 =head3 install_code
190 : efrank 1.1
191 :     usage: &install_code($fig_disk,$package)
192 :    
193 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
194 :    
195 :     $package must be an absolute filename where the "code package" from which to make
196 :     the update exists.
197 :    
198 :     Note that this routine does not check that the updated code is compatible, or even less
199 :     current. It is assumed that upper level logic is doing that.
200 :    
201 :     =cut
202 :    
203 :     sub install_code {
204 :     my($fig_disk,$package) = @_;
205 :     my $fig_base = "$fig_disk/FIG";
206 :     &force_absolute($fig_base);
207 :     &force_absolute($package);
208 :    
209 :     if (getcwd() !~ /FIGdisk$/) { print die "Sorry, you must run this while in $FIG_Config::fig_disk" }
210 :    
211 :    
212 :     (! -d "$fig_disk/BackupCode") || &FIG::run("rm -rf $fig_disk/BackupCode");
213 :     mkdir("$fig_disk/BackupCode",0777) || die "Could not make the BackupCode directory";
214 :     (! -d "$fig_disk/BackupEnv") || &FIG::run("rm -rf $fig_disk/BackupEnv");
215 :     mkdir("$fig_disk/BackupEnv",0777) || die "Could not make the BackupEnv directory";
216 :    
217 :     my $version = &what_code_do_I_have($fig_base);
218 :     &FIG::run("cd $fig_disk; mv README install lib man env src $fig_disk/BackupEnv");
219 :     &FIG::run("cd $fig_base; mv VERSION Packages CGI $fig_disk/BackupCode");
220 :     print STDERR "made backups\n";
221 :    
222 :     &FIG::run("cd $fig_disk; tar xzf $package");
223 :     print STDERR "untarred new code\n";
224 :    
225 :     &fix_config("$fig_base/Packages/FIG_Config.pm","$fig_disk/BackupCode/Packages/FIG_Config.pm");
226 :     &FIG::run("cd $fig_base/bin; touch ToolTemplates/*/*; make all");
227 :     &FIG::run("cd $fig_base/CGI; touch ToolTemplates/*/*; make all");
228 :     print STDERR "installed new bin and CGI\n";
229 :    
230 :     &FIG::run("bring_system_up_to_date $version");
231 :     }
232 :    
233 :     =pod
234 :    
235 : parrello 1.61 =head3 package_lightweight_code
236 : efrank 1.1
237 :     usage: &package_lightweight_code($fig_disk,$file)
238 :    
239 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
240 :     the updated code release will be taken.
241 :    
242 :     $file must be an absolute filename where the "code package" will be built.
243 :    
244 :     =cut
245 :    
246 :     sub package_lightweight_code {
247 :     my($fig_disk,$file) = @_;
248 :    
249 :     &force_absolute($fig_disk);
250 :     &force_absolute($file);
251 : olson 1.34 my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
252 : overbeek 1.2 my $current_release = $tmp[0];
253 : olson 1.34 chomp $current_release;
254 : overbeek 1.2
255 :     &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
256 : efrank 1.1 }
257 :    
258 :     =pod
259 :    
260 : parrello 1.61 =head3 install_lightweight_code
261 : efrank 1.1
262 :     usage: &install_lightweight_code($fig_disk,$package)
263 :    
264 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
265 :    
266 :     $package must be an absolute filename where the "code package" from which to make
267 :     the update exists.
268 :    
269 :     Note that this routine does not check that the updated code is compatible, or even less
270 :     current. It is assumed that upper level logic is doing that.
271 :    
272 :     =cut
273 :    
274 :     sub install_lightweight_code {
275 :     my($fig_disk,$package) = @_;
276 :     my $fig_base = "$fig_disk/FIG";
277 :     &force_absolute($fig_base);
278 :     &force_absolute($package);
279 :    
280 : overbeek 1.2 if (! mkdir("$fig_disk/Tmp$$",0777))
281 :     {
282 :     print "Error: could not make $fig_disk/Tmp$$\n";
283 :     exit;
284 :     }
285 : efrank 1.1
286 : overbeek 1.2 &FIG::run("cd $fig_disk/Tmp$$; tar xzf $package");
287 :     if (! opendir(TMP,"$fig_disk/Tmp$$"))
288 :     {
289 :     print "Error: could not open $fig_disk/Tmp$$\n";
290 :     exit;
291 :     }
292 : efrank 1.1
293 : overbeek 1.2 my @rels = grep { $_ !~ /^\./ } readdir(TMP);
294 :     closedir(TMP);
295 :     if (@rels != 1)
296 :     {
297 :     print "Error: Bad code package: $package\n";
298 :     exit;
299 :     }
300 : efrank 1.1
301 : overbeek 1.2 my $new_release = $rels[0];
302 :     if (-d "$fig_disk/dist/releases/$new_release")
303 :     {
304 :     print "Error: $new_release already exists; we are doing nothing\n";
305 :     exit;
306 :     }
307 : efrank 1.1
308 : efrank 1.3 &FIG::run("mv $fig_disk/Tmp$$/$new_release $fig_disk/dist/releases");
309 :     &FIG::run("rm -rf $fig_disk/Tmp$$");
310 : olson 1.22
311 :     #
312 :     # Ugh. For now, find the arch in the fig config file $fig_disk/config/fig-user-env.sh"
313 :     #
314 :    
315 :     my $arch;
316 :     open(FH, "<$fig_disk/config/fig-user-env.sh");
317 :     while (<FH>)
318 :     {
319 :     if (/RTARCH="(.*)"/)
320 :     {
321 :     $arch = $1;
322 :     last;
323 :     }
324 :     }
325 :     close(FH);
326 :    
327 :     if ($arch eq "")
328 :     {
329 :     die "Couldn't determine SEED install architecture, not switching to release.";
330 :     }
331 :    
332 :     $ENV{RTARCH} = $arch;
333 : olson 1.32
334 :     #
335 :     # Need to put the ext_bin in the path.
336 :     #
337 :    
338 :     $ENV{PATH} .= ":$FIG_Config::ext_bin";
339 : olson 1.22
340 : efrank 1.9 &FIG::run("$FIG_Config::bin/switch_to_release $new_release");
341 : efrank 1.1 }
342 :    
343 :    
344 :     sub fix_config {
345 :     my($new,$old) = @_;
346 :     my($line,$i);
347 :    
348 : olson 1.34 my @new = &FIG::file_read($new);
349 :     foreach $line (&FIG::file_read($old))
350 : efrank 1.1 {
351 :     if ($line =~ /^(\S+)\s+\=/)
352 :     {
353 : olson 1.33 my $var = $1;
354 :     my $varQ = quotemeta $var;
355 : efrank 1.1
356 :     for ($i=0; ($i < $#new) && ($new[$i] !~ /^$varQ\s+\=/); $i++) {}
357 :     if ($i == $#new)
358 :     {
359 :     splice(@new,$i,0,$line);
360 :     }
361 :     else
362 :     {
363 :     splice(@new,$i,1,$line);
364 :     }
365 :     }
366 :     }
367 :     open(NEW,">$new") || confess "could not overwrite $new";
368 :     foreach $line (@new)
369 :     {
370 :     print NEW $line;
371 :     }
372 :     close(NEW);
373 :     }
374 :    
375 :     =pod
376 :    
377 : parrello 1.61 =head3 what_genomes_will_I_sync
378 : efrank 1.1
379 :     usage: &what_genomes_will_I_sync($fig_base,$who)
380 :    
381 :     This routine returns the list of genome IDs that you are willing to sync with $who.
382 :    
383 :     =cut
384 :    
385 :     sub what_genomes_will_I_sync {
386 :     my($fig_base,$who) = @_;
387 :    
388 :     # This is the promiscuous version - it will sync all genomes with anyone.
389 :    
390 :     opendir(GENOMES,"$fig_base/Data/Organisms") || die "could not open $fig_base/Data/Organisms";
391 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(GENOMES);
392 :     closedir(GENOMES);
393 :     return @genomes;
394 :     }
395 :    
396 :     =pod
397 :    
398 : parrello 1.61 =head3 package_annotations
399 : efrank 1.1
400 : overbeek 1.29 usage: &package_annotations($fig,$genomes,$file)
401 : efrank 1.1
402 :     $genomes is a pointer to a list of genome IDs that will be exchanged.
403 :    
404 :     $file must be an absolute filename where the "annotation package" will be built.
405 :    
406 :     =cut
407 :    
408 :     sub package_annotations {
409 : olson 1.53 my($fig,$who,$date,$genomes,$file, %options) = @_;
410 : overbeek 1.29 my $fig_base = "$FIG_Config::fig_disk/FIG";
411 : efrank 1.1
412 : olson 1.52 if (!open(ANNOTATIONS,">$file"))
413 :     {
414 :     die "Cannot open annotations file $file for writing: $!";
415 :     }
416 :    
417 :    
418 :     my $annos = $fig->annotations_made_fast($genomes, $date, undef, $who);
419 :    
420 :     #
421 :     # $annos is a list of pairs [$genome, $genomeannos]
422 :     # $genomeannos is a hash keyed on peg. value is a list of lists [$peg, $time, $who, $anno].
423 :     #
424 :    
425 : olson 1.55 my @annos = sort { &FIG::by_genome_id($a->[0], $b->[0]) } @$annos;
426 : olson 1.52
427 :     for my $gent (@annos)
428 :     {
429 :     my($genome, $alist) = @$gent;
430 :    
431 :     for my $peg (sort { &FIG::by_fig_id($a, $b) } keys %$alist)
432 :     {
433 :     for my $aent (@{$alist->{$peg}})
434 :     {
435 : olson 1.55 print ANNOTATIONS $aent->as_text() . "\n///\n";
436 : olson 1.52 }
437 :     }
438 :    
439 :     }
440 : olson 1.53
441 :     print ANNOTATIONS "//\n";
442 :    
443 :     if (!$options{skip_aliases})
444 : olson 1.52 {
445 : olson 1.53 for my $gent (@annos)
446 : olson 1.52 {
447 : olson 1.53 my($genome, $alist) = @$gent;
448 :     my $gs = $fig->genus_species($genome);
449 :    
450 :     for my $peg (sort { &FIG::by_fig_id($a, $b) } keys %$alist)
451 : olson 1.52 {
452 : olson 1.53 my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
453 :     print ANNOTATIONS join("\t",($peg,join(",",@aliases),$gs,scalar $fig->function_of($peg))) . "\n";
454 : olson 1.52 }
455 :     }
456 : olson 1.53 }
457 : olson 1.52
458 :     print ANNOTATIONS "//\n";
459 : olson 1.53 if (!$options{skip_sequences})
460 : olson 1.52 {
461 : olson 1.53 for my $gent (@annos)
462 : olson 1.52 {
463 : olson 1.53 my($genome, $alist) = @$gent;
464 :    
465 :     for my $peg (sort { &FIG::by_fig_id($a, $b) } keys %$alist)
466 :     {
467 :     my $seq = $fig->get_translation($peg);
468 :     &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);
469 :     }
470 :    
471 : olson 1.52 }
472 :     }
473 :    
474 :     close(ANNOTATIONS);
475 :     }
476 :    
477 : olson 1.54 #
478 :     # This was the original version.
479 :     #
480 : olson 1.52 sub package_annotations2 {
481 :     my($fig,$who,$date,$genomes,$file) = @_;
482 :     my $fig_base = "$FIG_Config::fig_disk/FIG";
483 :    
484 : overbeek 1.29 if (open(ANNOTATIONS,">$file"))
485 : efrank 1.1 {
486 : overbeek 1.29 my @annotations = sort { $a->[0] cmp $b->[0] } $fig->annotations_made($genomes,$who,$date);
487 : olson 1.33 foreach my $x (@annotations)
488 : efrank 1.1 {
489 : overbeek 1.45 my $ann = join("\n",@$x);
490 :     if (($ann =~ /^fig\|\d+\.\d+\.peg\.\d+\n\d+\n/s) && ($ann !~ /\n\/\/\n/s))
491 :     {
492 :     print ANNOTATIONS join("\n",@$x),"\n///\n";
493 :     }
494 : efrank 1.1 }
495 : overbeek 1.29 print ANNOTATIONS "//\n";
496 : efrank 1.15
497 : olson 1.33 foreach my $x (@annotations)
498 : efrank 1.15 {
499 : olson 1.33 my $peg = $x->[0];
500 : overbeek 1.30 my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
501 :     print ANNOTATIONS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";
502 : efrank 1.15 }
503 : overbeek 1.29 print ANNOTATIONS "//\n";
504 :    
505 : olson 1.33 foreach my $x (@annotations)
506 : efrank 1.15 {
507 : olson 1.33 my $peg;
508 : overbeek 1.29 ($peg,undef) = @$x;
509 : overbeek 1.30 my $seq = $fig->get_translation($peg);
510 : overbeek 1.29 &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);
511 : efrank 1.15 }
512 : overbeek 1.29 close(ANNOTATIONS);
513 : efrank 1.15 }
514 : efrank 1.1 }
515 :    
516 : overbeek 1.29
517 : efrank 1.1 =pod
518 :    
519 : parrello 1.61 =head3 install_annotations
520 : efrank 1.1
521 :     usage: &install_annotations($fig_disk,$package)
522 :    
523 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
524 :    
525 :     $package must be an absolute filename where the "annotations package" from which to make
526 :     the update exists.
527 :    
528 :     =cut
529 :    
530 :     sub install_annotations {
531 : overbeek 1.29 my($fig,$package) = @_;
532 :     my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
533 :     my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq,$line,@ann,$ann);
534 : efrank 1.1 my($genome);
535 :    
536 : overbeek 1.29 my $fig_disk = $FIG_Config::fig_disk;
537 :     open(IN,"<$package") || die "could not open $package";
538 :     $/ = "\n//\n";
539 :     if (defined($line = <IN>))
540 :     {
541 : olson 1.33 my(@annotations);
542 :    
543 : overbeek 1.29 $line =~ s/\n\/\/\n/\n/s;
544 :     @ann = split(/\n\/\/\/\n/,$line);
545 :     foreach $ann (@ann)
546 :     {
547 :     if ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s)
548 :     {
549 :     push(@annotations,[$1,$2,$3,$4]);
550 :     }
551 :     }
552 :     $/ = "\n";
553 :     while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
554 :     {
555 : olson 1.34 chomp $line;
556 : overbeek 1.29 ($peg,$aliases,$org,$func) = split(/\t/,$line);
557 :     $pegs{$peg} = [$aliases,$org,$func];
558 :     }
559 :    
560 :     if ($line) { $line = <IN> }
561 :     while (defined($line) && ($line !~ /^\/\//))
562 :     {
563 :     if ($line =~ /^>(\S+)/)
564 :     {
565 :     $peg = $1;
566 :     @seq = ();
567 : overbeek 1.44 $line = <IN>;
568 :     while ($line && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
569 : overbeek 1.29 {
570 :     push(@seq,$line);
571 :     $line = <IN>;
572 :     }
573 :     $seq = join("",@seq);
574 :     $seq =~ s/[ \n\t]//gs;
575 :     $seq_of{$peg} = uc $seq;
576 :     }
577 : overbeek 1.46 else
578 :     {
579 :     $line = <IN>;
580 :     }
581 : overbeek 1.29 }
582 :     close(IN);
583 :     $trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);
584 :     @annotations = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) }
585 :     map { ($peg = $trans_pegs->{$_->[0]}) ? [$peg,$_->[1],$_->[2],$_->[3]] : () }
586 :     @annotations;
587 :    
588 :     if (-d "$fig_disk/BackupAnnotations") { system "rm -rf $fig_disk/BackupAnnotations" }
589 :     mkdir("$fig_disk/BackupAnnotations",0777);
590 :     mkdir("$fig_disk/BackupAnnotations/New",0777);
591 : olson 1.33 my $i;
592 :     for ($i=0; ($i < @annotations); $i++)
593 : overbeek 1.29 {
594 :     if (($i == 0) || ($fig->genome_of($annotations[$i]->[0]) ne $fig->genome_of($annotations[$i-1]->[0])))
595 :     {
596 :     if ($i != 0)
597 :     {
598 :     close(OUT);
599 :     }
600 :     $genome = $fig->genome_of($annotations[$i]->[0]);
601 :     open(OUT,">$fig_disk/BackupAnnotations/New/$genome")
602 :     || die "could not open $fig_disk/BackupAnnotations/New/$genome";
603 :     }
604 :     print OUT join("\n",@{$annotations[$i]}),"\n//\n";
605 :     }
606 :     if ($i > 0) { close(OUT) }
607 :     }
608 : efrank 1.15
609 : efrank 1.1 opendir(TMP,"$fig_disk/BackupAnnotations/New") || die "could not open $fig_disk/BackupAnnotations/New";
610 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
611 :     closedir(TMP);
612 :     foreach $genome (@genomes)
613 :     {
614 : overbeek 1.43 next if (! -d "$fig_disk/FIG/Data/Organisms/$genome");
615 : efrank 1.13
616 : efrank 1.1 print STDERR "installing $fig_disk/FIG/Data/Organisms/$genome/annotations\n";
617 :     if (-s "$fig_disk/FIG/Data/Organisms/$genome/annotations")
618 :     {
619 :     &FIG::run("cp -p $fig_disk/FIG/Data/Organisms/$genome/annotations $fig_disk/BackupAnnotations/$genome");
620 : efrank 1.9 &FIG::run("$FIG_Config::bin/merge_annotations $fig_disk/BackupAnnotations/$genome $fig_disk/BackupAnnotations/New/$genome > $fig_disk/FIG/Data/Organisms/$genome/annotations");
621 : efrank 1.1 }
622 :     else
623 :     {
624 :     &FIG::run("cp $fig_disk/BackupAnnotations/New/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
625 :     }
626 : olson 1.37 chmod 0777,"$fig_disk/FIG/Data/Organisms/$genome/annotations";
627 : efrank 1.1 }
628 : efrank 1.9 &FIG::run("$FIG_Config::bin/index_annotations");
629 : efrank 1.1 }
630 : olson 1.47
631 :    
632 :     =pod
633 :    
634 : parrello 1.61 =head3 install_annotations_gff
635 : olson 1.47
636 :     Install a set of annotations contained in a GFF3 file package.
637 :    
638 :     We parse using the FigGFF::GFFParser GFF parser. This returns a GFFFile object
639 :     that contains the parsed contents of the file.
640 :    
641 :     =cut
642 :    
643 :     sub install_annotations_gff
644 :     {
645 :     my($fig, $gff_file) = @_;
646 :    
647 :     my $db = $fig->db_handle;
648 :    
649 :     my $parser = new GFFParser($fig);
650 :    
651 :     my $fobj = $parser->parse($gff_file);
652 :    
653 :     #
654 :     # We assume that we only have one genome per GFF file, but we
655 :     # get the list of genomes and checksums via a general accessor anyway.
656 :     #
657 :    
658 :     for my $ent (@{$fobj->genome_checksums()})
659 :     {
660 :     my($genome, $checksum) = @$ent;
661 :    
662 :     #
663 :     # Determine if we have the same version of this genome.
664 :     #
665 :    
666 :     my $local_genome = $fig->genome_with_md5sum($checksum);
667 :     print "Local genome=$local_genome cksum=$checksum\n";
668 :    
669 :     #
670 :     # Walk the features, looking for matching features in the local SEED,
671 :     # and install the annotations if possible.
672 :     #
673 :    
674 :     my @annos;
675 :    
676 :     print "Walking $genome\n";
677 :     for my $feature (@{$fobj->features_for_genome($genome)})
678 :     {
679 :     my($local_id);
680 :    
681 :     my @local_ids = $feature->find_local_feature($local_genome);
682 :    
683 :     print "Mapped to @local_ids\n";
684 :     }
685 :     }
686 :     }
687 :    
688 : efrank 1.1
689 :     =pod
690 :    
691 : parrello 1.61 =head3 restore_annotations
692 : efrank 1.1
693 :     usage: &restore_annotations($fig_disk);
694 :    
695 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
696 :    
697 :     =cut
698 :    
699 :     sub restore_annotations {
700 :     my($fig_disk) = @_;
701 :    
702 :     &force_absolute($fig_disk);
703 :     (-d "$fig_disk/BackupAnnotations") || die "could not find an active backup";
704 :     opendir(TMP,"$fig_disk/BackupAnnotations") || die "could not open $fig_disk/BackupAnnotations";
705 :     my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
706 :     closedir(TMP);
707 : olson 1.33 foreach my $genome (@genomes)
708 : efrank 1.1 {
709 :     unlink("$fig_disk/FIG/Data/Organisms/$genome/annotations");
710 :     &FIG::run("cp $fig_disk/BackupAnnotations/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
711 : olson 1.37 system "chmod 777 $fig_disk/FIG/Data/Organisms/$genome/annotations";
712 : efrank 1.1 }
713 : efrank 1.9 &FIG::run("$FIG_Config::bin/index_annotations");
714 : efrank 1.1 }
715 :    
716 :     =pod
717 :    
718 : parrello 1.61 =head3 package_aassignments
719 : efrank 1.1
720 : overbeek 1.20 usage: package_assignments($fig,$user,$who,$date,$genomes,$file)
721 : efrank 1.1
722 : overbeek 1.20 $user designates the user wishing to get the assignments
723 : efrank 1.1
724 : overbeek 1.5 $who designates whose assignments you want (defaults to "master")
725 :    
726 :     $date if given indicates a point in time (get assignments after that point)
727 :    
728 : efrank 1.1 $genomes is a pointer to a list of genome IDs that will be exchanged.
729 :    
730 :     $file must be an absolute filename where the "assignment package" will be built.
731 :    
732 :     =cut
733 :    
734 :     sub package_assignments {
735 : overbeek 1.20 my($fig,$user,$who,$date,$genomes,$file) = @_;
736 : overbeek 1.27 my($genome,$x,$org,$curr,$peg);
737 : overbeek 1.4 $who = $who ? $who : "master";
738 : overbeek 1.7 $date = $date ? $date : 0;
739 : overbeek 1.27
740 : overbeek 1.20 if (open(ASSIGNMENTS,">$file"))
741 : efrank 1.1 {
742 : overbeek 1.20 print ASSIGNMENTS "$user\t$who\t$date\n";
743 : olson 1.33 my @assignments = sort { $a->[0] cmp $b->[0] } $fig->assignments_made_full($genomes,$who,$date);
744 : overbeek 1.36 my @curr_assignments = ();
745 : overbeek 1.27 foreach $x (@assignments)
746 : efrank 1.1 {
747 : overbeek 1.36 my($peg, $function, undef, undef) = @$x;
748 :     if ($function eq $fig->function_of($peg,$who))
749 :     {
750 :     print ASSIGNMENTS join("\t", $peg, $function),"\n";
751 :     push(@curr_assignments,$x);
752 :     }
753 : overbeek 1.4 }
754 : overbeek 1.27 print ASSIGNMENTS "//\n";
755 :    
756 : overbeek 1.36 foreach $x (@curr_assignments)
757 : overbeek 1.27 {
758 :     ($peg,undef) = @$x;
759 : overbeek 1.30 my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
760 : olson 1.33
761 :     my $alias_txt = join(",",@aliases);
762 :     my $gs_txt = $fig->genus_species($fig->genome_of($peg));
763 :     my $func_txt = scalar $fig->function_of($peg);
764 :    
765 :     print ASSIGNMENTS join("\t",($peg,
766 :     $alias_txt,
767 :     $gs_txt,
768 :     $func_txt)) . "\n";
769 : overbeek 1.28 }
770 :     print ASSIGNMENTS "//\n";
771 :    
772 : overbeek 1.36 foreach $x (@curr_assignments)
773 : overbeek 1.28 {
774 :     ($peg,undef) = @$x;
775 : overbeek 1.30 my $seq = $fig->get_translation($peg);
776 : overbeek 1.27 &FIG::display_id_and_seq($peg,\$seq,\*ASSIGNMENTS);
777 :     }
778 :     close(ASSIGNMENTS);
779 : efrank 1.1 }
780 :     }
781 :    
782 :     =pod
783 :    
784 : parrello 1.61 =head3 install_assignments
785 : efrank 1.1
786 : overbeek 1.20 usage: &install_assignments($package)
787 : efrank 1.1
788 : overbeek 1.20 $package must be a filename where the "assignments package" from which to make
789 :     the assignment set exists
790 : efrank 1.1
791 :     =cut
792 :    
793 :     sub install_assignments {
794 : overbeek 1.35 my($fig,$package,$make_assignments) = @_;
795 : overbeek 1.29 my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
796 :     my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq);
797 : efrank 1.1
798 : overbeek 1.20 open(IN,"<$package") || die "could not open $package";
799 :     my $line = <IN>;
800 : olson 1.34 chomp $line;
801 : overbeek 1.20 ($user,$who,$date) = split(/\t/,$line);
802 : olson 1.26 $userR = $user;
803 :     $userR =~ s/^master://;
804 : overbeek 1.29
805 : overbeek 1.30 while (defined($line = <IN>) && ($line !~ /^\/\//))
806 : overbeek 1.29 {
807 :     if ($line =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S.*\S)/)
808 :     {
809 :     push(@assignments,[$1,$2]);
810 :     }
811 :     }
812 :     while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
813 :     {
814 : olson 1.34 chomp $line;
815 : overbeek 1.29 ($peg,$aliases,$org,$func) = split(/\t/,$line);
816 :     $pegs{$peg} = [$aliases,$org,$func];
817 :     }
818 :    
819 :     if ($line) { $line = <IN> }
820 :     while (defined($line) && ($line !~ /^\/\//))
821 :     {
822 :     if ($line =~ /^>(\S+)/)
823 :     {
824 :     $peg = $1;
825 :     @seq = ();
826 : overbeek 1.44 $line = <IN>;
827 :     while ($line && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
828 : overbeek 1.29 {
829 :     push(@seq,$line);
830 :     $line = <IN>;
831 :     }
832 :     $seq = join("",@seq);
833 :     $seq =~ s/[ \n\t]//gs;
834 :     $seq_of{$peg} = uc $seq;
835 :     }
836 : overbeek 1.46 else
837 :     {
838 :     $line = <IN>;
839 : olson 1.47 }
840 : overbeek 1.29 }
841 :     close(IN);
842 :     $trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);
843 :    
844 : olson 1.26 &FIG::verify_dir("$FIG_Config::data/Assignments/$userR");
845 : overbeek 1.29 my $file = &FIG::epoch_to_readable($date) . ":$who:imported";
846 : overbeek 1.20 $file =~ s/\//-/g;
847 : overbeek 1.35
848 :     if (! $make_assignments)
849 :     {
850 :     open(OUT,">$FIG_Config::data/Assignments/$userR/$file")
851 :     || die "could not open $FIG_Config::data/Assignments/$userR/$file";
852 :     }
853 : overbeek 1.29
854 :     foreach $peg (keys(%$trans_pegs))
855 : overbeek 1.20 {
856 : overbeek 1.29 $peg_to = $trans_pegs->{$peg};
857 :     $func = $pegs{$peg}->[2];
858 :     if ($fig->function_of($peg_to) ne $func)
859 :     {
860 : overbeek 1.35 if ($make_assignments)
861 :     {
862 :     if ($user =~ /master:(.*)/)
863 :     {
864 :     $userR = $1;
865 :     $fig->assign_function($peg_to,"master",$func,"");
866 : golsen 1.62 # Now in assign_function
867 :     # if ($userR ne "none")
868 :     # {
869 :     # $fig->add_annotation($peg_to,$userR,"Set master function to\n$func\n");
870 :     # }
871 : overbeek 1.35 }
872 :     else
873 :     {
874 :     $fig->assign_function($peg_to,$user,$func,"");
875 : golsen 1.62 # Now in assign_function
876 :     # if ($user ne "none")
877 :     # {
878 :     # $fig->add_annotation($peg_to,$user,"Set function to\n$func\n");
879 :     # }
880 : overbeek 1.35 }
881 :     }
882 :     else
883 :     {
884 :     print OUT "$peg_to\t$func\n";
885 :     }
886 : overbeek 1.29 }
887 : overbeek 1.20 }
888 : overbeek 1.35 if (! $make_assignments)
889 :     {
890 :     close(OUT);
891 :     if (! -s "$FIG_Config::data/Assignments/$userR/$file") { unlink("$FIG_Config::data/Assignments/$userR/$file") }
892 :     }
893 : efrank 1.1 }
894 :    
895 :     =pod
896 :    
897 : parrello 1.61 =head3 package_translation_rules
898 : efrank 1.1
899 :     usage: &package_translation_rules($fig_base,$file)
900 :    
901 :     $fig_base must be an absolute filename (begins with "/") giving the FIG from which
902 :     the updated code release will be taken.
903 :    
904 :     $file must be an absolute filename where the "translation_rules package" will be built.
905 :    
906 :     =cut
907 :    
908 :     sub package_translation_rules {
909 :     my($fig_base,$file) = @_;
910 :    
911 :     &FIG::run("cp $fig_base/Data/Global/function.synonyms $file");
912 :     }
913 :    
914 :     =pod
915 :    
916 : parrello 1.61 =head3 install_translation_rules
917 : efrank 1.1
918 : efrank 1.10 usage: &install_translation_rules($fig_disk,$from,$package)
919 : efrank 1.1
920 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
921 :    
922 :     $package must be an absolute filename where the "translation_rules package" from which to make
923 :     the update exists.
924 :    
925 :     =cut
926 :    
927 :     sub install_translation_rules {
928 : efrank 1.10 my($fig_disk,$from,$package) = @_;
929 : efrank 1.1
930 :     my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
931 :     &force_absolute($fig_disk);
932 :     if (-d "$fig_disk/BackupTranslation_Rules") { system "rm -rf $fig_disk/BackupTranslation_Rules" }
933 :     mkdir("$fig_disk/BackupTranslation_Rules",0777);
934 : disz 1.31 chmod 02777,"$fig_disk/BackupTranslation_Rules";
935 : efrank 1.1 if (-s $file)
936 :     {
937 :     &FIG::run("cp $file $fig_disk/BackupTranslation_Rules");
938 :     }
939 : efrank 1.10 &FIG::run("$FIG_Config::bin/merge_translation_rules $fig_disk/BackupTranslation_Rules/function.synonyms $package $from > $file");
940 : disz 1.31 chmod 02777,$file;
941 : efrank 1.1 }
942 :    
943 :     =pod
944 :    
945 : parrello 1.61 =head3 restore_translation_rules
946 : efrank 1.1
947 :     usage: &restore_translation_rules($fig_disk);
948 :    
949 :     $fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.
950 :    
951 :     =cut
952 :    
953 :     sub restore_translation_rules {
954 :     my($fig_disk) = @_;
955 :    
956 :     &force_absolute($fig_disk);
957 :    
958 :     my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
959 :     (-s "$fig_disk/BackupTranslation_Rules/function.synonyms") || die "could not find an active backup";
960 :     if (-s "$fig_disk/BackupTranslation_Rules/function.synonyms")
961 :     {
962 :     &FIG::run("cp $fig_disk/BackupTranslation_Rules/function.synonyms $file");
963 : olson 1.37 chmod 0777, $file;
964 : efrank 1.1 }
965 :     }
966 :    
967 : overbeek 1.23 sub package_subsystems {
968 : overbeek 1.57 my($fig,$file,$just_exchangable,$just_these) = @_;
969 :     my($ssa,@exchangable);
970 : overbeek 1.27
971 : overbeek 1.57 if (@$just_these > 0)
972 :     {
973 :     @exchangable = @$just_these;
974 :     }
975 :     else
976 :     {
977 :     $just_exchangable = defined($just_exchangable) ? $just_exchangable : 1;
978 :     @exchangable = grep { (! $just_exchangable) || $fig->is_exchangable_subsystem($_) }
979 :     $fig->all_subsystems;
980 :     }
981 : overbeek 1.23 my $fig = new FIG;
982 : overbeek 1.24 if ((@exchangable > 0) && open(SUB,">$file"))
983 : overbeek 1.23 {
984 : overbeek 1.24 foreach $ssa (@exchangable)
985 : overbeek 1.23 {
986 : overbeek 1.25 # print STDERR "writing $ssa to $file\n";
987 :     my($spreadsheet,$notes) = $fig->exportable_subsystem($ssa);
988 : overbeek 1.23 print SUB join("",@$spreadsheet),join("",@$notes),"########################\n";
989 :     }
990 :     close(SUB);
991 :     }
992 : overbeek 1.25 else
993 :     {
994 :     # print STDERR &Dumper(\@exchangable,$file);
995 :     }
996 : overbeek 1.23 }
997 :    
998 : overbeek 1.30 sub install_subsystems {
999 :     my($fig,$package) = @_;
1000 :    
1001 : disz 1.31 &FIG::run("$FIG_Config::bin/import_subsystems master last_release < $package");
1002 : overbeek 1.30 }
1003 :    
1004 : olson 1.37
1005 :     =pod
1006 :    
1007 :     =head2 unpack_packaged_subsystem
1008 :    
1009 :     Unpack a packaged subsystem (from the clearinghouse or a p2p transfer)
1010 :     into a directory; this will create a directory named as the subsystem
1011 :     and formatted like the standard subsystem directories, as well as a
1012 :     file of assignments and a file of sequences in fasta format.
1013 :    
1014 :     Returns the name of the subsystem.
1015 :    
1016 :     =cut
1017 :    
1018 :     sub unpack_packaged_subsystem
1019 :     {
1020 :     my($fig, $file, $target_dir) = @_;
1021 :    
1022 :     my $user = $fig->get_user();
1023 :    
1024 :     &FIG::verify_dir($target_dir);
1025 :    
1026 :     my $fh;
1027 :    
1028 :     if (!open($fh, "<$file"))
1029 :     {
1030 :     warn "unpack_packaged_subsystem: cannot open $file: $!";
1031 :     return undef;
1032 :     }
1033 :    
1034 :     #
1035 :     # We scan the file, breaking it up into sections and writing
1036 :     # to the appropriate places.
1037 :     #
1038 :     # First the header.
1039 :     #
1040 :    
1041 :     local $/ = "\n//\n";
1042 :    
1043 :     my $header = <$fh>;
1044 :     chomp $header;
1045 :    
1046 :     my ($name, $version, $exchangable, $curation) = split(/\n/, $header);
1047 :    
1048 :     print "Importing name=$name version=$version exch=$exchangable curation='$curation'\n";
1049 :    
1050 :     #
1051 :     # Pull in roles, subsets, and spreadsheet. These will be written to the new
1052 :     # spreadsheet file.
1053 :     #
1054 :    
1055 :     my $roles = <$fh>;
1056 :     chomp $roles;
1057 :    
1058 :     my $subsets = <$fh>;
1059 :     chomp $subsets;
1060 :    
1061 :     my $spreadsheet = <$fh>;
1062 :     chomp $spreadsheet;
1063 :    
1064 :    
1065 :     #
1066 :     # Pull the assignments and sequences. These go to their own files.
1067 :     #
1068 :    
1069 :     my $assignments = <$fh>;
1070 :     chomp $assignments;
1071 :    
1072 :     my $sequences = <$fh>;
1073 :     chomp $sequences;
1074 :    
1075 :     #
1076 :     # And the notes; these will be written to the subsystem dir.
1077 :     #
1078 :    
1079 :     my $notes = <$fh>;
1080 :     chomp $notes;
1081 :    
1082 :     close($fh);
1083 :    
1084 :     #
1085 :     # Everything is read. Now to write it all back out again.
1086 :     #
1087 :    
1088 :     #
1089 :     # First the subsystem.
1090 :     #
1091 :    
1092 :     my $ss_path = "$target_dir/subsystem";
1093 :     &FIG::verify_dir($ss_path);
1094 :    
1095 :     open($fh, ">$ss_path/EXCHANGABLE");
1096 :     print $fh "$exchangable\n";
1097 :     close($fh);
1098 :    
1099 :     open($fh, ">$ss_path/VERSION");
1100 :     print $fh "$version\n";
1101 :     close($fh);
1102 :    
1103 :     open($fh, ">$ss_path/curation.log");
1104 :     print $fh "$curation\n";
1105 :     my $now = time;
1106 :     print $fh "$now\t$user\timported\n";
1107 :     close($fh);
1108 :    
1109 :     open($fh, ">$ss_path/notes");
1110 :     print $fh "$notes\n";
1111 :     close($fh);
1112 :    
1113 :     open($fh, ">$ss_path/spreadsheet");
1114 :     print $fh "$roles\n";
1115 :     print $fh "//\n";
1116 :     print $fh "$subsets\n";
1117 :     print $fh "//\n";
1118 :     print $fh "$spreadsheet\n";
1119 :     close($fh);
1120 :    
1121 :     open($fh, ">$target_dir/subsystem_name");
1122 :     print $fh "$name\n";
1123 :     close($fh);
1124 :    
1125 :     open($fh, ">$target_dir/assignments");
1126 :     print $fh "$assignments\n";
1127 :     close($fh);
1128 :    
1129 :     open($fh, ">$target_dir/seqs.fasta");
1130 :     print $fh "$sequences\n";
1131 :     close($fh);
1132 :    
1133 :     return $name;
1134 :     }
1135 :    
1136 : olson 1.33 package SubsystemFile;
1137 :    
1138 :     use Data::Dumper;
1139 :     use strict;
1140 : olson 1.50 use Carp;
1141 :     use MIME::Base64;
1142 :    
1143 : olson 1.33 sub new
1144 :     {
1145 :     my($class, $qdir, $file, $fig) = @_;
1146 :     my(@info);
1147 :    
1148 : olson 1.50 my $use_cache = defined($qdir);
1149 :    
1150 : olson 1.33 @info = FIG::file_head($file, 4);
1151 :     if (!@info)
1152 :     {
1153 :     warn "Cannot open $file\n";
1154 :     return undef;
1155 :     }
1156 :    
1157 :     chomp(@info);
1158 :    
1159 :     my $name = $info[0];
1160 :     my $version = $info[1];
1161 :     my $exc = $info[2];
1162 :    
1163 :     my @c = split(/\t/, $info[3]);
1164 :    
1165 :     my $curator = $c[1];
1166 :    
1167 :     my $self = {
1168 :     qdir => $qdir,
1169 : olson 1.50 use_cache => $use_cache,
1170 : olson 1.33 file => $file,
1171 :     name => $name,
1172 :     version => $version,
1173 :     exchangable => $exc,
1174 :     curator => $curator,
1175 : olson 1.39 curation_log => $info[3],
1176 : olson 1.33 fig => $fig,
1177 :     };
1178 :    
1179 :     return bless($self, $class);
1180 :    
1181 :     }
1182 :    
1183 :     #
1184 :     # Load the export file into internal data structures.
1185 :     #
1186 :     # It's structured as
1187 :     #
1188 :     # name
1189 :     # version
1190 :     # exchangable
1191 :     # creation date <tab> curator <tab> "started"
1192 :     # //
1193 :     # roles
1194 :     # //
1195 :     # subsets
1196 :     # //
1197 :     # spreadsheet
1198 :     # //
1199 :     # assignments
1200 :     # //
1201 :     # sequences
1202 :     # //
1203 :     # notes
1204 :     # //
1205 : olson 1.49 # reactions
1206 : olson 1.33 #
1207 :     # Subsections:
1208 :     #
1209 :     # roles:
1210 :     #
1211 :     # abbr <tab> role-name
1212 :     #
1213 :     # subsets has meaning to the acutal subsystems, but we'll use it as a string.
1214 :     #
1215 :     # spreadsheet:
1216 :     #
1217 :     # genome <tab> variant <tab> items
1218 :     #
1219 :     # Where items is tab-separated columns, each of which is comma-separated peg number in the genome
1220 :     #
1221 :     # assignments:
1222 :     #
1223 :     # fid <tab> aliases <tab> organism <tab> function
1224 :     #
1225 :     # sequences:
1226 :     #
1227 :     # list of fasta's
1228 :     #
1229 :     # notes:
1230 :     #
1231 :     # plain text
1232 :     #
1233 :     sub load
1234 :     {
1235 :     my($self) = @_;
1236 :    
1237 :     my $fig = $self->{fig};
1238 :    
1239 :     my($fh);
1240 :    
1241 :     open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";
1242 :    
1243 :     #
1244 : olson 1.39 # Skip intro section - we already read this information in the constructor.
1245 : olson 1.33 #
1246 :    
1247 :     while (<$fh>)
1248 :     {
1249 :     chomp;
1250 :     last if m,^//,;
1251 :     }
1252 :    
1253 :     #
1254 :     # Read the roles.
1255 :     #
1256 :    
1257 :    
1258 :     my $nroles;
1259 :    
1260 :     while (<$fh>)
1261 :     {
1262 :     last if m,^//,;
1263 :    
1264 :     $self->{role_text} .= $_;
1265 :     chomp $_;
1266 :    
1267 :     my($abbr, $role) = split(/\t/);
1268 :    
1269 :     warn "Have role $role\n";
1270 :    
1271 :     push(@{$self->{roles}}, $role);
1272 :     push(@{$self->{abbrs}}, $abbr);
1273 :    
1274 :     $nroles++;
1275 :     }
1276 :    
1277 :     #
1278 :     # Read in subsets as a string.
1279 :     #
1280 :    
1281 :     while (<$fh>)
1282 :     {
1283 :     last if m,^//,;
1284 :    
1285 :     $self->{subsets_text} .= $_;
1286 :     }
1287 :    
1288 :     #
1289 :     # Read the spreadsheet.
1290 :     #
1291 :    
1292 :     while (<$fh>)
1293 :     {
1294 :     last if m,^//,;
1295 :    
1296 :     $self->{spreadsheet_text} .= $_;
1297 :    
1298 :     chomp;
1299 :    
1300 :     my($genome, $variant, @items) = split(/\t/, $_, $nroles + 2);
1301 :    
1302 :     push(@{$self->{genomes}}, $genome);
1303 :    
1304 :     my $gobj = GenomeObj->new($self, $fig, $genome, $variant, [@items]);
1305 :    
1306 :     $self->{genome_objs}->{$genome} = $gobj;
1307 :     }
1308 :    
1309 :     #
1310 :     # Read PEG info
1311 :     #
1312 :    
1313 :     while (<$fh>)
1314 :     {
1315 :     last if m,^//,;
1316 :    
1317 :     chomp;
1318 :    
1319 :     my ($peg, $aliases, $org, $func) = split(/\t/);
1320 :    
1321 :     push(@{$self->{pegs}}, [$peg, $aliases, $org, $func]);
1322 :     }
1323 :    
1324 :     #
1325 :     # Read sequence info
1326 :     #
1327 :    
1328 :     my($cur, $cur_peg);
1329 :    
1330 :     while (<$fh>)
1331 :     {
1332 :     if (/^>(fig\|\d+\.\d+\.peg\.\d+)/)
1333 :     {
1334 :     if ($cur)
1335 :     {
1336 :     $cur =~ s/\s+//gs;
1337 :     $self->{peg_seq}->{$cur_peg} = $cur;
1338 :     }
1339 :     $cur_peg = $1;
1340 :     $cur = '';
1341 :     }
1342 :     elsif (m,^//,)
1343 :     {
1344 :     $cur =~ s/\s+//gs;
1345 :     $self->{peg_seq}->{$cur_peg} = $cur;
1346 :     last;
1347 :     }
1348 :     else
1349 :     {
1350 :     $cur .= $_;
1351 :     }
1352 :     }
1353 :    
1354 :     #
1355 :     # Read notes as a string
1356 :     #
1357 :    
1358 :     while (<$fh>)
1359 :     {
1360 :     last if m,^//,;
1361 :    
1362 :     $self->{notes_txt} .= $_;
1363 :     }
1364 : olson 1.49
1365 :     #
1366 :     # Anything left here is reaction data.
1367 :     #
1368 :    
1369 :     my $reactions;
1370 :    
1371 :     while (<$fh>)
1372 :     {
1373 :     last if m,^//,;
1374 :    
1375 :     if (/^([^\t]+)\t([^\t]+)/)
1376 :     {
1377 :     $reactions .= $_;
1378 :     }
1379 :     }
1380 :    
1381 :     $self->{reactions} = $reactions if $reactions ne "";
1382 :    
1383 : olson 1.50 #
1384 :     # Additional sections. If $_ is //<something>, go ahead and process the blocks.
1385 :     #
1386 :     #
1387 :    
1388 :     my @blocks = ();
1389 :    
1390 :     if (m,^//(.*)$,)
1391 :     {
1392 :     chomp;
1393 : olson 1.51 my $cur_block;
1394 : olson 1.50 my $cur_tag = $1;
1395 :     while (<$fh>)
1396 :     {
1397 :     if (m,^//end$,)
1398 :     {
1399 : olson 1.51 push(@blocks, [$cur_tag, $cur_block]);
1400 : olson 1.50 }
1401 :     elsif (m,^//(.*)$,)
1402 :     {
1403 :     chomp;
1404 : olson 1.51 $cur_block = [];
1405 : olson 1.50 $cur_tag = $1;
1406 :     }
1407 :     else
1408 :     {
1409 : olson 1.51 push(@$cur_block, $_);
1410 : olson 1.50 }
1411 :     }
1412 :     }
1413 :     $self->{blocks} = \@blocks;
1414 : olson 1.33 }
1415 :    
1416 :     #
1417 : olson 1.39 # Compute or load from cache the PEG translations for this subsystem.
1418 :     #
1419 :     sub ensure_peg_translations
1420 :     {
1421 :     my($self) = @_;
1422 :    
1423 :     #
1424 :     # First we map the PEGs in this subsystem to PEGs in the
1425 :     # local SEED.
1426 :     #
1427 :     # translate_pegs requires a hash of peg->[aliases] as the first argument,
1428 :     # and a hash of peg->sequence as the second argument.
1429 :     #
1430 :    
1431 :     my $fig = $self->{fig};
1432 :    
1433 :     my %pegs;
1434 :     my %seqs_of;
1435 :    
1436 :     for my $pegent (@{$self->{pegs}})
1437 :     {
1438 :     my($peg, $aliases, $org, $func) = @$pegent;
1439 :     $pegs{$peg} = [$aliases, $org, $func];
1440 :     $seqs_of{$peg} = $self->{peg_seq}->{$peg};
1441 :     }
1442 :    
1443 :     sub show_cb
1444 :     {
1445 :     print "$_[0]<p>\n";
1446 :     }
1447 :    
1448 :     my $cached_translation_file = "$self->{qdir}/peg_translation";
1449 :    
1450 :     my $tran_peg;
1451 :    
1452 : olson 1.50 if ($self->{use_cache} and -f $cached_translation_file and -s $cached_translation_file > 0)
1453 : olson 1.39 {
1454 :     #
1455 :     # Read the cached translations.
1456 :     #
1457 :    
1458 :     if (open(my $fh, "<$cached_translation_file"))
1459 :     {
1460 :     warn "Reading cached peg translations\n";
1461 :     $tran_peg = {};
1462 :     while (<$fh>)
1463 :     {
1464 :     chomp;
1465 :     my($k, $v) = split(/\t/);
1466 :     $tran_peg->{$k} = $v;
1467 :     }
1468 :     close($fh);
1469 :     }
1470 :     }
1471 :    
1472 :     if (!$tran_peg)
1473 :     {
1474 :     $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of, \&show_cb);
1475 :    
1476 :     #
1477 :     # tran_peg is now a hash from subsystem_peg->local_peg
1478 :     #
1479 :    
1480 :     #
1481 :     # Write the translations out to a file in the queue directory
1482 :     # for use during installation.
1483 :     #
1484 :    
1485 : olson 1.50 if ($self->{use_cache} and open(my $fh, ">$self->{qdir}/peg_translation"))
1486 : olson 1.39 {
1487 :     for my $p (keys(%$tran_peg))
1488 :     {
1489 :     my $tp = $tran_peg->{$p};
1490 :     print $fh "$p\t$tp\n";
1491 :     }
1492 :     close($fh);
1493 :     }
1494 :     }
1495 :     $self->{tran_peg} = $tran_peg;
1496 :     return $tran_peg;
1497 :     }
1498 :    
1499 :     #
1500 : olson 1.33 # Analyze this subsystem for compatibility with this SEED install.
1501 :     #
1502 :     # Returns three lists:
1503 :     #
1504 :     # A major conflict list, consisting of tuples
1505 :     # [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg
1506 :     # is the peg in the subsystem being analyzied, and $ss_func is
1507 :     # its assigned function in that subsystem. $loc_peg is the peg
1508 :     # in the local SEED, and $loc_func its local assignment. $subs is
1509 :     # the list of pairs [$subsystem_name, $role] denoting the subsystem(s)
1510 :     # that $loc_peg particpates in.
1511 :     #
1512 :     # A conflict is flagged if the local function is different than
1513 :     # the one being imported, and if the local peg is in a subsystem.
1514 :     #
1515 :     # A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].
1516 :     #
1517 :     #
1518 :     # The second list is a list of subsystem pegs that do not have
1519 :     # a local equivalent. Each entry is a triple
1520 :     # [peg, orgname, function].
1521 :     #
1522 :    
1523 :     sub analyze
1524 :     {
1525 :     my($self) = @_;
1526 :     my $fig = $self->{fig};
1527 :    
1528 : olson 1.39 my $tran_peg = $self->ensure_peg_translations();
1529 :    
1530 :     #
1531 :     # Now we walk the PEGs, determining a) which are missing
1532 :     # in the local SEED, and b) which have a conflicting assignment.
1533 :     #
1534 : olson 1.33 #
1535 : olson 1.39 # We also need to determine if this assignment will cause
1536 :     # pegs to be filled into subsystem roles that were not
1537 :     # otherwise going to be added.
1538 : olson 1.33 #
1539 : olson 1.39 # To enable this, we determine from the subsystem index
1540 :     # the list all roles that are present in subsystems on
1541 :     # this SEED.
1542 : olson 1.33 #
1543 :    
1544 : olson 1.39 my $sub_name = $self->name();
1545 :     my $subsystem_roles = $fig->subsystem_roles();
1546 :    
1547 :     my(@conflict, $minor_conflict, $missing);
1548 :    
1549 : olson 1.40 #
1550 :     # Hashes for accumulating aggregate counts of conflicts.
1551 :     #
1552 :    
1553 :     my(%subs_in, %subs_out, %roles_in, %roles_out);
1554 :    
1555 : olson 1.39 $missing = [];
1556 :    
1557 :     print "Determining conflicts...<p>\n";
1558 : olson 1.33
1559 :     for my $pegent (@{$self->{pegs}})
1560 :     {
1561 : olson 1.39 my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;
1562 :    
1563 :     #
1564 :     # If this peg has a local translation, determine if
1565 :     # the associated assignment conflicts with a local assignment.
1566 :     #
1567 :     # One type of conflict occurs when the new assignment would cause
1568 :     # the peg to be removed from a subsystem. This occurs when the
1569 :     # new functional assignment is different from the current
1570 :     # assignment, and the peg is already in a subsystem.
1571 :     #
1572 :     # Another type of conflict occurs when the new assignment
1573 :     # for a peg matches a role that exists in a locally-installed
1574 :     # subsystem. This will cause the peg to be added to a
1575 :     # subsystem upon refill of that subsystem.
1576 :     #
1577 :     # It is possible for both the above conditions to hold,
1578 :     # in which case a peg would be moved out of one
1579 :     # subsystem into another.
1580 :     #
1581 :     # We denote these cases in the generated conflict list by
1582 :     # annotating the entry with the list of subsystems from which
1583 :     # the peg would be removed if the assignment were to be
1584 :     # accepted, and the list of subsystems to which the
1585 :     # peg would be added.
1586 :     #
1587 :    
1588 :     if (my $loc_peg = $tran_peg->{$ss_peg})
1589 :     {
1590 :     my $subs_removed = [];
1591 :     my $subs_added = [];
1592 :    
1593 :     #
1594 :     # Determine what our locally-assigned function is, and what
1595 :     # subsystem this peg appears in.
1596 :     #
1597 :    
1598 :     my $loc_func = $fig->function_of($loc_peg);
1599 :    
1600 :     #
1601 :     # If the functions don't match, it's a conflict.
1602 :     # If the local function is in a subsystem, it's a major
1603 :     # conflict. If it's not, it's a minor conflict.
1604 :     #
1605 :     # We actually let the major/minor determination be done by
1606 :     # the analysis display code, since the difference is only in whether
1607 :     # there are subsystems.
1608 :     #
1609 :    
1610 :     if ($loc_func ne $ss_func)
1611 :     {
1612 :    
1613 :     #
1614 :     # If the function defined in the new subsystem is different than
1615 :     # the current function, we mark a conflict. Along with the conflict
1616 :     # we include a list of the subsystems in which the local peg
1617 :     # is included.
1618 :     #
1619 :     # We use the subsystems_for_peg method to determine local subsystems
1620 :     # associated with a peg. It returns a list of pairs [subsystem, rolename].
1621 :     #
1622 :    
1623 :     #
1624 :     # What if we are loading a new version of an existing subsystem, and
1625 :     # a role name has changed?
1626 :     #
1627 :     # In this case, $loc_func ne $ss_func, $loc_peg will appear in the local copy of
1628 :     # the subsystem we are loading, and hence as a candidate for removal from that subsystem.
1629 :     # This may be thought of as a spurious message, and leads me to want to remove
1630 :     # such warnings (if I'm updating a subsystem, I can expect that the pegs in that
1631 :     # subsystem will change).
1632 :     #
1633 : olson 1.40 # subsystems_for_peg returns a list of pairs [subsystem, role].
1634 :     #
1635 :     # There might be somethign of a discrepancy here. This only
1636 :     # measures the subsystems the peg is actually currently part of, not
1637 :     # the number of subsystems that have a role corresponding to the peg's
1638 :     # current assignment.
1639 :     #
1640 : olson 1.39
1641 :     my @removed = $fig->subsystems_for_peg($loc_peg);
1642 : olson 1.40
1643 :     for my $r (@removed)
1644 :     {
1645 :     my($rsub, $rrole) = @$r;
1646 :    
1647 :     #
1648 :     # Skip the numbers for an existing subsystem.
1649 :     #
1650 :     next if $rsub eq $sub_name;
1651 :    
1652 :     $roles_out{$rrole}++;
1653 :     $subs_out{$rsub}++;
1654 :    
1655 :     push(@$subs_removed, $r);
1656 :     }
1657 : olson 1.39
1658 :     #
1659 :     # We also check to see if the new function is present
1660 :     # as a role in any local subsystem. If it is, then when that subsystem
1661 :     # is refilled, this peg will appear in it.
1662 :     #
1663 : olson 1.41 # $subsystem_roles is a hash keyed on role name with each value
1664 : olson 1.40 # a list of subsystem names.
1665 : olson 1.39 #
1666 :    
1667 :     if (my $loc_ss = $subsystem_roles->{$ss_func})
1668 :     {
1669 : olson 1.40 #
1670 :     # $loc_ss is the set of subsystems that have the new
1671 :     # function assignment as a role name.
1672 :     #
1673 : olson 1.41 my @subs = grep { $_ ne $sub_name} @$loc_ss;
1674 : olson 1.40
1675 : olson 1.41 if (@subs)
1676 :     {
1677 :     push(@$subs_added, @subs);
1678 :    
1679 :     map { $subs_in{$_}++ } @subs;
1680 :     $roles_in{$ss_func}++;
1681 :     }
1682 : olson 1.39 }
1683 :    
1684 :     push(@conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed, $subs_added]);
1685 :     }
1686 :    
1687 :     }
1688 :     else
1689 :     {
1690 :     push(@$missing, [$ss_peg, $ss_org, $ss_func]);
1691 :     }
1692 :     }
1693 :    
1694 :     my $conflict = [sort { @{$b->[4]} + @{$b->[5]} <=> @{$a->[4]} + @{$a->[5]} } @conflict];
1695 :    
1696 : olson 1.40 my $aggreg = {
1697 :     roles_in => [keys(%roles_in)],
1698 :     roles_out => [keys(%roles_out)],
1699 :     subs_in => [keys(%subs_in)],
1700 :     subs_out => [keys(%subs_out)],
1701 :     };
1702 :    
1703 :     return ($conflict, $missing, $aggreg);
1704 : olson 1.39 }
1705 :    
1706 :     sub read_cached_analysis
1707 :     {
1708 :     my($self) = @_;
1709 :    
1710 :     my $cfile = "$self->{qdir}/conflicts";
1711 :     my $mfile = "$self->{qdir}/missing";
1712 :    
1713 :     my($conflict, $missing);
1714 :     $conflict = [];
1715 :     $missing = [];
1716 :    
1717 :     if (open(my $fh, "<$cfile"))
1718 :     {
1719 :    
1720 :     while (<$fh>)
1721 :     {
1722 :     chomp;
1723 :    
1724 :     my($ss_peg, $ss_func, $loc_peg, $loc_func) = split(/\t/);
1725 :    
1726 :     my $subs_removed = <$fh>;
1727 :     my $subs_added = <$fh>;
1728 :    
1729 :     chomp($subs_removed);
1730 :     chomp($subs_added);
1731 :    
1732 :     my @subs_removed_raw = split(/\t/, $subs_removed);
1733 :     my $subs_added_list = [split(/\t/, $subs_added)];
1734 :    
1735 :     my $subs_removed_list = [];
1736 :    
1737 :     while (@subs_removed_raw)
1738 :     {
1739 :     my($v1, $v2, @rest) = @subs_removed_raw;
1740 :     @subs_removed_raw = @rest;
1741 :     push(@$subs_removed_list, [$v1, $v2]);
1742 :     }
1743 :    
1744 :     push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs_removed_list, $subs_added_list]);
1745 :     }
1746 :     }
1747 :    
1748 :     if (open(my $fh, "<$mfile"))
1749 :     {
1750 :    
1751 :     while (<$fh>)
1752 :     {
1753 :     chomp;
1754 :    
1755 :     my(@a) = split(/\t/);
1756 :     push(@$missing, [@a]);
1757 :     }
1758 :     }
1759 :    
1760 :     return($conflict, $missing);
1761 :     }
1762 :    
1763 :     #
1764 :     # Install this subsystem.
1765 :     #
1766 : olson 1.42 # $dont_assign is a list of subsytem PEGs that should not have their assignments overwritten.
1767 : olson 1.39 #
1768 :     # We return a list of for-the-installer messages that will be presented when the install completes.
1769 :     #
1770 : olson 1.50 # If $assignments_file is set, assignments will be written to that file
1771 :     # instead of being installed.
1772 :     #
1773 : olson 1.39 sub install
1774 :     {
1775 : olson 1.50 my($self, $dont_assign, $imported_from, $assignments_file) = @_;
1776 : olson 1.39
1777 :     my @messages;
1778 :    
1779 :     my $fig = $self->{fig};
1780 :     my $subsystems_dir = "$FIG_Config::data/Subsystems";
1781 :    
1782 :     my $sub_name = $self->name();
1783 :     $sub_name =~ s/ /_/g;
1784 :     my $sub_dir = "$subsystems_dir/$sub_name";
1785 :     my $ver = $self->version();
1786 :    
1787 :     #
1788 :     # First check to see if we already have this subsystem installed.
1789 :     #
1790 :    
1791 :     if (-d $sub_dir and (my $cur_ver = $fig->subsystem_version($sub_name)) >= $ver)
1792 :     {
1793 :     warn "Not importing $sub_name: current version $cur_ver >= imported version $ver";
1794 : olson 1.50 push(@messages, "Not importing $sub_name: current version $cur_ver >= imported version $ver\n");
1795 :     return @messages;
1796 : olson 1.39 }
1797 :    
1798 :     warn "Importing $sub_name version $ver\n";
1799 :     push(@messages, "Importing $sub_name version $ver\n");
1800 :    
1801 :     if (! -d $sub_dir)
1802 :     {
1803 :     mkdir($sub_dir, 0777) or die "Cannot mkdir $sub_dir: $!";
1804 : olson 1.33 }
1805 :    
1806 : olson 1.39 #
1807 :     # Write the header/meta information.
1808 :     #
1809 :    
1810 :     my $fh;
1811 : olson 1.50 $imported_from = "???" unless $imported_from ne '';
1812 : olson 1.39
1813 :     open($fh, ">$sub_dir/VERSION") or die "Cannot open $sub_dir/VERSION for writing: $!";
1814 :     print $fh "$ver\n";
1815 :     close($fh);
1816 :     chmod(0666, "$sub_dir/VERSION");
1817 :    
1818 :     open($fh, ">$sub_dir/EXCHANGABLE") or die "Cannot open $sub_dir/EXCHANGABLE for writing: $!";
1819 :     print $fh $self->exchangable() . "\n";
1820 :     close($fh);
1821 :     chmod(0666, "$sub_dir/EXCHANGABLE");
1822 :    
1823 :     open($fh, ">$sub_dir/curation.log") or die "Cannot open $sub_dir/curation.logt for writing: $!";
1824 :     print $fh "$self->{curation_log}\n";
1825 :     my $time = time;
1826 :     print $fh "$time\t$imported_from\timported_from\n";
1827 :     close($fh);
1828 :     chmod(0666, "$sub_dir/curation.log");
1829 :    
1830 :     open($fh, ">$sub_dir/notes") or die "Cannot open $sub_dir/notes for writing: $!";
1831 :     print $fh $self->{notes_txt} . "\n";
1832 :     close($fh);
1833 :     chmod(0666, "$sub_dir/notes");
1834 : olson 1.49
1835 :     if ($self->{reactions})
1836 :     {
1837 :     open($fh, ">$sub_dir/reactions") or die "Cannot open $sub_dir/reactions for writing: $!";
1838 :     print $fh $self->{reactions} . "\n";
1839 :     close($fh);
1840 :     chmod(0666, "$sub_dir/reactions");
1841 :     }
1842 : olson 1.39
1843 :     my $tran_peg = $self->ensure_peg_translations();
1844 : olson 1.33
1845 :     #
1846 : olson 1.39 # We can start writing the spreadsheet.
1847 : olson 1.33 #
1848 :    
1849 : olson 1.39 my $ssa_fh;
1850 :     open($ssa_fh, ">$sub_dir/spreadsheet") or die "Cannot open $sub_dir/spreadsheet for writing: $!";
1851 :    
1852 : olson 1.33 #
1853 : olson 1.39 # Start with the roles and subsets.
1854 : olson 1.33 #
1855 : olson 1.39
1856 :     print $ssa_fh $self->{role_text};
1857 :     print $ssa_fh "//\n";
1858 :    
1859 :     print $ssa_fh $self->{subsets_text};
1860 :     print $ssa_fh "//\n";
1861 : olson 1.33
1862 : olson 1.39 for my $g (@{$self->{genomes}})
1863 : olson 1.33 {
1864 : olson 1.39 my $gobj = $self->{genome_objs}->{$g};
1865 :     my ($trans_genome, @row) = $gobj->translate($tran_peg);
1866 :    
1867 :     if ($trans_genome)
1868 : olson 1.33 {
1869 : olson 1.39 print $ssa_fh join("\t", $trans_genome, $gobj->{variant}, @row), "\n";
1870 : olson 1.33 }
1871 :     }
1872 : olson 1.39
1873 :     close($ssa_fh);
1874 :    
1875 :     #
1876 : olson 1.50 # The subsystem itself is now in place. Depending on how we were
1877 :     # invoked, write the assignments to a file, or install them on
1878 :     # the system.
1879 :     #
1880 : olson 1.58 # If dont_assign is not a list but is true, save no assignments at all.
1881 :     #
1882 :     if (!ref($dont_assign) and $dont_assign)
1883 :     {
1884 :     # Skip assignments
1885 :     }
1886 :     elsif (defined($assignments_file))
1887 : olson 1.50 {
1888 :     $self->write_assignments_to_file(\@messages, $tran_peg, $assignments_file);
1889 :     }
1890 :     else
1891 :     {
1892 :     $self->install_assignments(\@messages, $tran_peg, $dont_assign);
1893 :     }
1894 :    
1895 :     $self->install_blocks(\@messages, $sub_dir);
1896 :    
1897 :     return @messages;
1898 :     }
1899 :    
1900 :     #
1901 :     # Install any other block-data code that's in the package. Right now this is just
1902 :     # the diagrams.
1903 :     #
1904 :     sub install_blocks
1905 :     {
1906 :     my($self, $messages, $sub_dir) = @_;
1907 :     my $fig = $self->{fig};
1908 :    
1909 :     #
1910 :     # At this point, the rest of the subsystem is written to disk. We can
1911 :     # use the Subsys.pm mechanism to write this stuff out.
1912 :     #
1913 :    
1914 :     my $sub = $fig->get_subsystem($self->name());
1915 :    
1916 :     for my $block (@{$self->{blocks}})
1917 :     {
1918 :     my($block_hdr, $block_data) = @$block;
1919 :    
1920 :     if ($block_hdr =~ /^diagram:([^:]+):name\t(\S+)/)
1921 :     {
1922 :     #
1923 :     # The diagram output format ensures this is the first block, so just
1924 :     # create the diagram.
1925 :     #
1926 :    
1927 :     my $diagram_id = $1;
1928 :     my $diagram_name = $2;
1929 :    
1930 : olson 1.60 $sub->create_new_diagram(undef, undef, $diagram_name, $diagram_id, 1);
1931 : olson 1.50 }
1932 :     elsif ($block_hdr =~ m,^diagram:([^:]+):diagram=([^\s/]+)\t(\d+),)
1933 :     {
1934 :     my $diagram_id = $1;
1935 :     my $img_file = $2;
1936 :     my $size = $3;
1937 :    
1938 :     my $ddir = "$sub_dir/diagrams/$diagram_id";
1939 :    
1940 :     if (! -d $ddir)
1941 :     {
1942 :     push(@$messages, "Invalid diagrams: diagram directory for $diagram_id did not exist while parsing diagram file\n");
1943 :     next;
1944 :     }
1945 :    
1946 :     if (!open(FH, ">$ddir/$img_file"))
1947 :     {
1948 :     push(@$messages, "Cannot open image file $ddir/$img_file for writing: $!\n");
1949 :     next;
1950 :     }
1951 :    
1952 :     for my $line (@$block_data)
1953 :     {
1954 :     $line =~ s/^B://;
1955 :     my $dec = decode_base64($line);
1956 :     print FH $dec;
1957 :     }
1958 :     close(FH);
1959 :    
1960 :     my $fsize = -s "$ddir/$img_file";
1961 :     if ($fsize != $size)
1962 :     {
1963 :     push(@$messages, "Diagram image $img_file size $fsize does not match declared size $size\n");
1964 :     warn "Diagram image $img_file size $fsize does not match declared size $size";
1965 :     }
1966 :     }
1967 :     }
1968 :     }
1969 :    
1970 :    
1971 :    
1972 :     sub write_assignments_to_file
1973 :     {
1974 :     my($self, $messages, $tran_peg, $filename) = @_;
1975 :     my $fig = $self->{fig};
1976 :    
1977 :     my $fh;
1978 :    
1979 :     for my $pegent (@{$self->{pegs}})
1980 :     {
1981 :     my($peg, $aliases, $org, $func) = @$pegent;
1982 :     my $tpeg = $tran_peg->{$peg};
1983 :    
1984 :     if (!$tpeg)
1985 :     {
1986 :     warn "Couldn't translate $peg (from $org)\n";
1987 :     push(@$messages, "Couldn't translate $peg (from $org)");
1988 :     next;
1989 :     }
1990 :    
1991 :     my $old = $fig->function_of($tpeg);
1992 :    
1993 :     if ($old ne $func)
1994 :     {
1995 :     #
1996 :     # Only open the file if we have assignments to write.
1997 :     #
1998 :    
1999 :     if (!defined($fh))
2000 :     {
2001 :     open($fh, ">$filename") or confess "Error opening $filename for writing: $!";
2002 :     }
2003 :     print $fh "$tpeg\t$func\n";
2004 :     }
2005 :     }
2006 :     if (defined($fh))
2007 :     {
2008 :     close($fh);
2009 :     }
2010 :     }
2011 :    
2012 :    
2013 :     sub install_assignments
2014 :     {
2015 :     my($self, $messages, $tran_peg, $dont_assign) = @_;
2016 :     my $fig = $self->{fig};
2017 :    
2018 : olson 1.33 #
2019 : olson 1.39 # Enter the new assignments, saving the old assignments in the spool dir.
2020 : olson 1.33 #
2021 :    
2022 : olson 1.39 my $now = time;
2023 :    
2024 :     my $old_funcs_fh;
2025 :     open($old_funcs_fh, ">>$self->{qdir}/old_assignments.$now");
2026 :    
2027 :     my $curator = $self->curator();
2028 : olson 1.33
2029 : olson 1.42 my %dont_assign;
2030 :    
2031 :     map { $dont_assign{$_}++ } @$dont_assign;
2032 :    
2033 : olson 1.33 for my $pegent (@{$self->{pegs}})
2034 :     {
2035 : olson 1.39 my($peg, $aliases, $org, $func) = @$pegent;
2036 :     my $tpeg = $tran_peg->{$peg};
2037 :    
2038 :     if (!$tpeg)
2039 :     {
2040 :     warn "Couldn't translate $peg (from $org)\n";
2041 : olson 1.50 push(@$messages, "Couldn't translate $peg (from $org)");
2042 : olson 1.39 next;
2043 :     }
2044 :    
2045 : olson 1.42 if ($dont_assign{$peg})
2046 :     {
2047 :     warn "Skipping assignment of $peg ($tpeg locally)\n";
2048 :     next;
2049 :     }
2050 :    
2051 : olson 1.39 my $old = $fig->function_of($tpeg);
2052 :    
2053 :     if ($old ne $func)
2054 : olson 1.33 {
2055 : olson 1.39 print $old_funcs_fh "$tpeg\t$old\t$curator\t$func\n";
2056 :     $fig->add_annotation($tpeg, $curator,
2057 :     "Assigning function $func based on installation of subsystem $self->{name}");
2058 : olson 1.33
2059 : golsen 1.62 # Everyone is now master, and assign_function adds annotation
2060 :     #
2061 :     # if ($curator =~ /master:(.*)/)
2062 :     # {
2063 :     # my $user = $1;
2064 :     # $fig->assign_function($tpeg, "master", $func, "");
2065 :     # $fig->add_annotation($tpeg, $user, "Set master function to\n$func\n");
2066 :     # }
2067 :     # else
2068 :     # {
2069 :    
2070 :     $fig->assign_function($tpeg, $curator, $func, "");
2071 :    
2072 :     # $fig->add_annotation($tpeg, $curator, "Set function to\n$func\n");
2073 :     # }
2074 : olson 1.33 }
2075 :     else
2076 :     {
2077 : olson 1.39 # print "$tpeg already has assignment $func\n";
2078 : olson 1.33 }
2079 :     }
2080 : olson 1.39 close($old_funcs_fh);
2081 : olson 1.33 }
2082 :    
2083 : olson 1.41 #
2084 :     # Read the aggregate analysis results.
2085 :     #
2086 :    
2087 :     sub aggregate_analysis
2088 :     {
2089 :     my($self) = @_;
2090 :    
2091 :     if (open(my $fh, "<$self->{qdir}/aggregate"))
2092 :     {
2093 :     local($/);
2094 :     my $txt = <$fh>;
2095 :     close($fh);
2096 :    
2097 :     my $VAR1; # For the Dumper'd data.
2098 :    
2099 :     my $compartment = new Safe;
2100 :     my $aggr = $compartment->reval($txt);
2101 :     if ($@)
2102 :     {
2103 :     warn "aggregate_analysis: error parsing saved data: $@";
2104 :     return undef;
2105 :     }
2106 :     return $aggr;
2107 :     }
2108 :     else
2109 :     {
2110 :     return undef;
2111 :     }
2112 :     }
2113 :    
2114 : olson 1.33 sub name
2115 :     {
2116 :     my($self) = @_;
2117 :     return $self->{name};
2118 :     }
2119 :    
2120 :    
2121 :     sub version
2122 :     {
2123 :     my($self) = @_;
2124 :     return $self->{version};
2125 :     }
2126 :    
2127 :     sub exchangable
2128 :     {
2129 :     my($self) = @_;
2130 :     return $self->{exchangable};
2131 :     }
2132 :    
2133 :     sub curator
2134 :     {
2135 :     my($self) = @_;
2136 :     return $self->{curator};
2137 :     }
2138 :    
2139 : olson 1.39 sub analysis_complete
2140 :     {
2141 :     my($self) = @_;
2142 :    
2143 :     return -f "$self->{qdir}/analysis_complete";
2144 :     }
2145 :    
2146 :     sub analysis_jobid
2147 :     {
2148 :     my($self) = @_;
2149 :    
2150 :     my $jid_file = "$self->{qdir}/analysis_jobid";
2151 :    
2152 :     return &FIG::file_head($jid_file, 1);
2153 :     }
2154 :    
2155 : olson 1.33 package GenomeObj;
2156 :    
2157 : olson 1.39 use strict;
2158 :     use Data::Dumper;
2159 :    
2160 :     #
2161 :     # A genomeobj is a small datatype that holds the data in a row of a
2162 :     # spreadsheet file.
2163 :     #
2164 :    
2165 : olson 1.33 sub new
2166 :     {
2167 :     my($class, $subfile, $fig, $genome, $variant, $items) = @_;
2168 :    
2169 :     my $self = {
2170 :     fig => $fig,
2171 :     subfile => $subfile,
2172 :     genome => $genome,
2173 :     variant => $variant,
2174 :     items => $items,
2175 :     };
2176 :     return bless($self, $class);
2177 :    
2178 :     }
2179 :    
2180 : olson 1.39 #
2181 :     # Translate this row to a new context.
2182 :     #
2183 :     # $trans_peg is a hash mapping from spreadsheet PEG to local PEG
2184 :     #
2185 :     sub translate
2186 :     {
2187 :     my($self, $trans_peg) = @_;
2188 :     my $fig = $self->{fig};
2189 :    
2190 :     my $genome = $self->{genome};
2191 :    
2192 :     my $parsed_items = [];
2193 :     $self->{parsed_items} = $parsed_items;
2194 :     my $trans_items = [];
2195 :     $self->{trans_items} = $trans_items;
2196 :    
2197 :     #
2198 :     # Hash of genomes seen in this row.
2199 :     my %genomes;
2200 :    
2201 :     for my $item (@{$self->{items}})
2202 :     {
2203 : olson 1.59 my $l = [];
2204 :     for my $fid (split(/,/, $item))
2205 :     {
2206 :     if ($fid =~ /^\d+$/)
2207 :     {
2208 :     push(@$l, "fig|$genome.peg.$fid");
2209 :     }
2210 :     else
2211 :     {
2212 :     push(@$l, "fig|$genome.$fid");
2213 :     }
2214 :     }
2215 :    
2216 : olson 1.39 my $t = [ map { $trans_peg->{$_} } @$l ];
2217 :    
2218 :     push(@$parsed_items, $l);
2219 :     push(@$trans_items, $t);
2220 :    
2221 :     #
2222 :     # Count the genomes that are seen in the translated pegs.
2223 :     #
2224 :    
2225 :     for my $tpeg (@$t)
2226 :     {
2227 :     my $tg = $fig->genome_of($tpeg);
2228 :     $genomes{$tg}++ if $tg ne "";
2229 :     }
2230 :    
2231 :     }
2232 :    
2233 :     #
2234 :     # Now determine the dominant organism for this translated row.
2235 :     #
2236 :    
2237 :     my @orgs = sort { $genomes{$b} <=> $genomes{$a} } keys(%genomes);
2238 :    
2239 :     # print "@{$self->{items}}\n";
2240 :     # print join(" ", map { "$_: $genomes{$_} " } @orgs ), "\n";
2241 :    
2242 :     unless (@orgs == 1 # Single organism
2243 :     or
2244 :     (@orgs > 1 and $genomes{$orgs[0]} > (2 * $genomes{$orgs[1]})) # First org has > 2x the second org
2245 :     )
2246 :     {
2247 :     warn "Could not determine translation for $genome\n";
2248 :     return undef;
2249 :     }
2250 :    
2251 :     #
2252 :     # The dominant organism is the first in the list.
2253 :     #
2254 :    
2255 :     my $dom = $orgs[0];
2256 :    
2257 :     #
2258 :     # Run through the translated pegs, and remove the ones that are
2259 :     # not in the dominant organism.
2260 :     #
2261 :    
2262 :     my @res;
2263 :     for my $i (0..@$trans_items - 1)
2264 :     {
2265 :     my $t = $trans_items->[$i];
2266 :    
2267 :     my @nt;
2268 :     for my $peg (@$t)
2269 :     {
2270 :     if ($peg =~ /^fig\|(\d+\.\d+)\.peg\.(\d+)$/)
2271 :     {
2272 :     if ($1 eq $dom)
2273 :     {
2274 :     push(@nt, $2);
2275 :     }
2276 :     }
2277 : olson 1.59 elsif ($peg =~ /^fig\|(\d+\.\d+)\.([^.]+\.\d+)$/)
2278 :     {
2279 :     if ($1 eq $dom)
2280 :     {
2281 :     push(@nt, $2);
2282 :     }
2283 :     }
2284 : olson 1.39 }
2285 :     push(@res, join(",", @nt));
2286 :     }
2287 :     return $dom, @res;
2288 :     }
2289 : efrank 1.1 1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3