[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.31 - (view) (download) (as text)

1 : olson 1.1 #
2 : olson 1.31 # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     #
19 : olson 1.1 # This module contains the code for the P2P update protocol.
20 :     #
21 :     # Package P2P contains the namespace declarations, and possibly toplevel utility
22 :     # routines. (get_relay ?)
23 :     #
24 :     # Package P2P::Relay contains methods for contacting the P2P relay service. The actual
25 :     # implementation of the relay service is not contained here - it is a standalone module
26 :     # that can be installed on a web server that does not have a full SEED installed.
27 :     #
28 :     # Package P2P::Requestor contains the requestor-side code for the update protocol.
29 :     #
30 :     # Package P2P::Service contains the web service implementation routines for the
31 :     # protocol.
32 :     #
33 :    
34 :     package P2P;
35 :    
36 :     use FIG_Config;
37 :    
38 : olson 1.24 use AnyDBM_File;
39 : olson 1.19 use Fcntl;
40 :    
41 : olson 1.1 use strict;
42 :     use Exporter;
43 :     use base qw(Exporter);
44 :    
45 : olson 1.15 use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
46 :    
47 : olson 1.1 use Data::Dumper;
48 :    
49 :     use vars qw(@EXPORT @EXPORT_OK);
50 :     @EXPORT = ();
51 :     @EXPORT_OK = qw($ns_p2p $ns_relay);
52 :    
53 :     our $ns_p2p = "http://thefig.info/schemas/p2p_update";
54 :     our $ns_relay = "http://thefig.info/schemas/p2p_relay";
55 :    
56 : olson 1.21 my $peg_batch_size = 1000;
57 :     my $anno_batch_size = 1000;
58 :     my $assign_batch_size = 1000;
59 : olson 1.23 my $fin_batch_size = 1000;
60 : olson 1.21
61 : olson 1.22 my $log_fh;
62 : olson 1.23 my $html_fh;
63 : olson 1.22
64 : olson 1.1 =pod
65 :    
66 : olson 1.25 =head1 perform_update($peer, $last_update, $skip_tough_search, $update_thru, $log_file, $html_file, $assignment_policy))
67 : olson 1.1
68 :     Perform a peer-to-peer update with the given peer. $peer is an instance of
69 :     P2P::Requestor which can connect to the peer. It is expected that the
70 :     SEED infrastructure will create this requestor appropriately for the
71 :     particular circumstance (direct connection, thru relay, etc).
72 :    
73 :     This code executes the high-level protocol, maintaining state between
74 :     calls to the peer to exchange the actual information.
75 :    
76 : olson 1.25 $last_update: Search for updates since this time.
77 :     $skip_tough_search: Do not use the time-consuming $fig->tough_search method as a last resort for peg mapping.
78 :     $update_thru: Search for updates until this time. Undef means to search for all since $last_update.
79 :     $log_file: Write logging information to this file.
80 :     $html_file: Write a HTML summary to this file.
81 :     $assignment_policy: If a list reference, contains the list of users from which we will accept assignments. If a code ref, a predicate that is passed ($peg, $timestamp, $author, $function) and returns true if the assignment should be made.
82 :    
83 : olson 1.1 =cut
84 :    
85 :     sub perform_update
86 :     {
87 : olson 1.25 my($fig, $peer, $last_update, $skip_tough_search, $update_thru, $log_file, $html_file,
88 :     $assignment_policy) = @_;
89 :    
90 :     my $allow_assignment;
91 : olson 1.22
92 :     $log_file = "/dev/null" unless $log_file ne "";
93 :     open($log_fh, ">>$log_file") or die "Cannot open logfile $log_file: $!\n";
94 :     $log_fh->autoflush(1);
95 :    
96 : olson 1.23 $html_file = "/dev/null" unless $html_file ne "";
97 :     open($html_fh, ">$html_file") or die "Cannot open htmlfile $html_file: $!\n";
98 :     $html_fh->autoflush(1);
99 :    
100 : olson 1.27 if (!defined($assignment_policy))
101 :     {
102 :     $allow_assignment = sub { 1;};
103 :     }
104 :     elsif (ref($assignment_policy) eq "CODE")
105 : olson 1.25 {
106 :     $allow_assignment = $assignment_policy;
107 :     }
108 :     elsif (ref($assignment_policy) eq "ARRAY")
109 :     {
110 :     my $ahash = {};
111 :     map { $ahash->{$_}++; } @$assignment_policy;
112 :     $allow_assignment = sub {
113 :     return $ahash->{$_[2]};
114 :     };
115 :     }
116 :     elsif (ref($assignment_policy) eq "HASH")
117 :     {
118 :     $allow_assignment = sub {
119 :     return $assignment_policy->{$_[2]};
120 :     };
121 :     }
122 :     else
123 :     {
124 :     print $log_fh "Invalid assignment policy $assignment_policy\n";
125 :     die "Invalid assignment policy $assignment_policy\n";
126 :     }
127 :    
128 : olson 1.23 my $now = localtime();
129 :     my $last_str = localtime($last_update);
130 :     print $html_fh <<END;
131 :     <h1>P2P Update at $now</h1>
132 :     Peer URL $peer->{url}<br>
133 :     Update from: $last_str<br>
134 :     END
135 :    
136 :     print $log_fh "Beginning P2P update at $now\n";
137 : olson 1.22 print $log_fh " Peer URL: $peer->{url}\n";
138 : olson 1.23 print $log_fh " Update from: $last_str\n";
139 : olson 1.22 print $log_fh "\n";
140 : olson 1.1
141 : olson 1.20 my $ret = $peer->request_update($last_update, $update_thru);
142 : olson 1.1
143 :     if (!$ret or ref($ret) ne "ARRAY")
144 :     {
145 : olson 1.18 die "perform_update: request_update failed\n";
146 : olson 1.1 }
147 :    
148 : olson 1.15 my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
149 : olson 1.1 $target_time, $compatible) = @$ret;
150 :    
151 : olson 1.18 print "perform_update: session=$session target=@$target_release num_annos=$num_annos\n";
152 : olson 1.1 print " num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
153 :    
154 : olson 1.22 my @my_release = $fig->get_release_info();
155 :    
156 :     print $log_fh "Session id = $session\n";
157 :     print $log_fh "Target release information: \n\t", join("\n\t", @$target_release), "\n";
158 :     print $log_fh "My release information: \n\t", join("\n\t", @my_release), "\n";
159 :     print $log_fh "$num_annos annotations\n";
160 :     print $log_fh "$num_assignments assignments\n";
161 :     print $log_fh "$num_pegs pegs\n";
162 :    
163 : olson 1.23 print $html_fh "Session id = $session<br>\n";
164 :     print $html_fh "Target release information: <br>\n\t", join("<br>\n\t", @$target_release), "<br>\n";
165 :     print $html_fh "My release information: <br>\n\t", join("<br>\n\t", @my_release), "<br>\n";
166 :     print $html_fh "$num_annos annotations<br>\n";
167 :     print $html_fh "$num_assignments assignments<br>\n";
168 :     print $html_fh "$num_pegs pegs<br>\n";
169 :    
170 : olson 1.1 #
171 : olson 1.19 # We now know the data release for our peer.
172 :     #
173 : olson 1.24 # Open up the peg translation cache database (a AnyDBM_File) tied
174 : olson 1.19 # to %peg_cache. We needn't worry about keeping it in a directory
175 :     # based on our current release, as it the cache directory is kept *in*
176 :     # the current data release directory.
177 :     #
178 :    
179 :     my $cache_handle;
180 :     my %peg_cache;
181 :     if ($target_release->[1] ne "")
182 :     {
183 :     my $cache_file = "pegcache.$target_release->[1].db";
184 : olson 1.30 my $cache_dir = "$FIG_Config::fig/var/P2PQueue";
185 :     &FIG::verify_dir("$FIG_Config::fig/var");
186 : olson 1.19 $fig->verify_dir($cache_dir);
187 :    
188 : olson 1.24 $cache_handle = tie(%peg_cache, "AnyDBM_File", "$cache_dir/$cache_file",
189 :     O_CREAT | O_RDWR, 0666);
190 : olson 1.19 $cache_handle or warn "Could not tie peg_cache to $cache_dir/$cache_file: $!\n";
191 :     }
192 :    
193 :     #
194 : olson 1.21 # peg_mapping is the local mapping from remote->local peg. This might
195 :     # be replacable by peg_cache from above.
196 : olson 1.1 #
197 : olson 1.21 my %peg_mapping;
198 : olson 1.1
199 : olson 1.21
200 : olson 1.1 #
201 : olson 1.21 # We have the information now to begin the update process. Retrieve the pegs.
202 : olson 1.1 #
203 :    
204 : olson 1.21 _compute_peg_mapping($fig, $peer, $session, $num_pegs, \%peg_mapping, \%peg_cache, $cache_handle,
205 :     $skip_tough_search);
206 : olson 1.7
207 : olson 1.24 eval { $cache_handle->sync();};
208 : olson 1.19 untie %peg_cache;
209 :    
210 :     #
211 : olson 1.15 # Create a list of locally-mapped annotations on a per-genome
212 :     # basis.
213 :     #
214 :    
215 :     my %genome_annos;
216 : olson 1.19
217 :     #
218 :     # %genome_assignments is a hash mapping from genome to a hashref
219 :     # that maps peg to function (since assignments are unique).
220 :     #
221 :     # (Hm. Unless two remote pegs map to the same local peg; unclear what to do
222 :     # then. Punt for now).
223 :     #
224 :     my %genome_assignments;
225 : olson 1.15
226 : olson 1.21 #
227 :     # Retrieve the annotations, and generate a list of mapped annotations.
228 :     #
229 :    
230 :     for (my $anno_start = 0; $anno_start < $num_annos; $anno_start += $anno_batch_size)
231 : olson 1.15 {
232 : olson 1.21 my $anno_req_len = $num_annos - $anno_start;
233 :     $anno_req_len = $anno_batch_size if $anno_req_len > $anno_batch_size;
234 : olson 1.15
235 : olson 1.21 print "Retrieve $anno_req_len annos at $anno_start\n";
236 : olson 1.22 print $log_fh "Retrieve $anno_req_len annos at $anno_start\n";
237 : olson 1.21
238 :     my $annos = $peer->get_annotations($session, $anno_start, $anno_req_len);
239 : olson 1.15
240 : olson 1.21 for my $anno (@$annos)
241 :     {
242 :     my($his_id, $ts, $author, $anno) = @$anno;
243 :    
244 :     my $my_id = $peg_mapping{$his_id};
245 :     next unless $my_id;
246 : olson 1.15
247 : olson 1.21 my $genome = $fig->genome_of($my_id);
248 :    
249 :     push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
250 :     }
251 : olson 1.15 }
252 :    
253 : olson 1.19 #
254 :     # Do the same for the assignments
255 :     #
256 :    
257 : olson 1.20 # print Dumper($assignments);
258 :    
259 : olson 1.21
260 :     for (my $assign_start = 0; $assign_start < $num_assignments; $assign_start += $assign_batch_size)
261 : olson 1.19 {
262 : olson 1.21 my $assign_req_len = $num_assignments - $assign_start;
263 :     $assign_req_len = $assign_batch_size if $assign_req_len > $assign_batch_size;
264 : olson 1.19
265 : olson 1.21 print "Retrieve $assign_req_len assigns at $assign_start\n";
266 : olson 1.22 print $log_fh "Retrieve $assign_req_len assigns at $assign_start\n";
267 : olson 1.21
268 :     my $assignments = $peer->get_assignments($session, $assign_start, $assign_req_len);
269 :    
270 :     for my $assign (@$assignments)
271 :     {
272 :     my($his_id, $ts, $author, $func) = @$assign;
273 : olson 1.19
274 : olson 1.21 my $my_id = $peg_mapping{$his_id};
275 :     next unless $my_id;
276 : olson 1.19
277 : olson 1.21 my $genome = $fig->genome_of($my_id);
278 : olson 1.19
279 : olson 1.21 $genome_assignments{$genome}->{$my_id} = [$my_id, $ts, $author, $func];
280 :     }
281 : olson 1.19 }
282 :    
283 :     # print Dumper(\%genome_annos);
284 : olson 1.15
285 :     #
286 :     # Now install annotations.
287 :     #
288 :    
289 :     for my $genome (keys(%genome_annos))
290 :     {
291 : olson 1.19 #
292 :     # Plan: Apply the merge_annotations.pl logic. Read the annotations
293 :     # from the per-org annotations file, add the new ones here, sort, and remove duplicates.
294 :     # Write the results to the annotations file.
295 :     #
296 :     # When we are all done, rerun the index_annotations script.
297 :     #
298 :     # Why not do that incrementally? Partly because the annotation_seeks table doesn't
299 :     # have a column for the genome id, so a removal of old data would require a
300 :     # string-match query; since a complete reindex of the annotations is pretty
301 :     # fast (60 sec on a G4 laptop on a firewire disk), it's not clear whether the incremental
302 :     # update would actually be a win.
303 :     #
304 :    
305 :     my @annos = @{$genome_annos{$genome}};
306 :     my $assignments = $genome_assignments{$genome};
307 :     #
308 :     # %assignment_annos is a hash from peg to the list
309 :     # of annotations for that peg.
310 :     #
311 :     my %assignment_annos;
312 :    
313 :     my $dir = "$FIG_Config::organisms/$genome";
314 :     my $anno_file = "$dir/annotations";
315 :     my $anno_bak = "$dir/annotations." . time;
316 :    
317 :     my $new_count = @annos;
318 :    
319 :     #
320 :     # Rename the annotations file to a new name based on the current time.
321 :     #
322 :    
323 : olson 1.23 my $gs = $fig->genus_species($genome);
324 :     print $html_fh "<h1>Updates for $genome ($gs)</h1>\n";
325 :    
326 : olson 1.19 if (-f $anno_file)
327 :     {
328 :     rename($anno_file, $anno_bak) or die "Cannot rename $anno_file to $anno_bak: $!";
329 : olson 1.22 print $log_fh "Moved annotations file $anno_file to backup $anno_bak\n";
330 : olson 1.19 }
331 :    
332 :     if (open(my $fh, "<$anno_bak"))
333 :     {
334 :     #
335 :     # While we are scanning here, we look for the latest local assignment
336 :     # for any peg for which we are installing an assignment.
337 :     #
338 :     local($/) = "\n//\n";
339 :    
340 :     my($chunk, $peg, $ts, $author, $anno);
341 :    
342 :     while (defined($chunk = <$fh>))
343 :     {
344 :     chomp $chunk;
345 :     ($peg, $ts, $author, $anno) = split(/\n/, $chunk, 4);
346 :    
347 :     if ($peg =~ /^fig\|/ and $ts =~ /^\d+$/)
348 :     {
349 : olson 1.26 #
350 : olson 1.28 # The last field marks this as an "old" annotation (that is,
351 :     # already in place in this system), so we don't
352 : olson 1.26 # log its installation later.
353 :     #
354 :     my $ent = [$peg, $ts, $author, $anno, 1];
355 : olson 1.19 push(@annos, $ent);
356 :    
357 :     if (defined($assignments->{$peg}))
358 :     {
359 :     #
360 :     # We have an incoming assignment for this peg.
361 :     # Don't parse anything yet, but push the annotation
362 :     # on a list so we can sort by date.
363 :     #
364 :     push(@{$assignment_annos{$peg}}, $ent);
365 :     }
366 :     }
367 :     }
368 :     close($fh);
369 :     }
370 :    
371 :     #
372 :     # Determine if we are going to install an assignment.
373 :     #
374 :    
375 : olson 1.23 my $cgi_url = &FIG::cgi_url();
376 :     print $html_fh "<h2>Assignments made</h2>\n";
377 : olson 1.25 print $html_fh "<table border=\"1\">\n";
378 : olson 1.23 print $html_fh "<tr><th>PEG</th><th>Old assignment</th><th>New assignment</th><tr>\n";
379 :    
380 : olson 1.19 for my $peg (keys %$assignments)
381 :     {
382 : olson 1.20 my(undef, $ts, $author, $func) = @{$assignments->{$peg}};
383 :    
384 :     #
385 :     # Sort the existing annotations for this peg by date.
386 :     #
387 : olson 1.26 # Recall that this list has entries [$peg, $timestamp, $author, $anno, $old_flag]
388 : olson 1.20 #
389 :    
390 :     my @eannos;
391 :     if (ref($assignment_annos{$peg}))
392 :     {
393 :     @eannos = sort { $b->[1] <=> $a->[1] } @{$assignment_annos{$peg}};
394 :     }
395 :     else
396 :     {
397 :     #
398 :     # No assignment annotations found.
399 :     #
400 :     @eannos = ();
401 :     }
402 :    
403 :     # print "Assignment annos for $peg: ", Dumper(\@eannos);
404 : olson 1.19
405 :     #
406 : olson 1.20 # Filter out just the master assignments that are newer than
407 :     # the one we are contemplating putting in place.
408 : olson 1.19 #
409 :    
410 : olson 1.20 my @cand = grep {
411 :     ($_->[1] > $ts) and ($_->[3] =~ /Set master function to/)
412 :     } @eannos;
413 :    
414 :     if (@cand > 0)
415 :     {
416 :     #
417 :     # Here is were some policy needs to be put in place --
418 :     # we have a more recent annotation on the current system.
419 :     #
420 :     # For now, we will not install an assignment if there is any
421 :     # newer assignment in place.
422 :     #
423 :    
424 :     warn "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
425 : olson 1.22 print $log_fh "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
426 : olson 1.20 }
427 :     else
428 :     {
429 :     #
430 :     # Nothing is blocking us. While we are testing, just slam this assignment in.
431 :     #
432 : olson 1.19
433 : olson 1.20 my $old = $fig->function_of($peg, 'master');
434 :    
435 : olson 1.25 if ($old ne $func and &$allow_assignment($peg, $ts, $author, $func))
436 : olson 1.21 {
437 : olson 1.23 my $l = "$cgi_url/protein.cgi?prot=$peg";
438 :     print $html_fh "<tr><td><a href=\"$l\">$peg</a></td><td>$old</td><td>$func</td></tr>\n";
439 :    
440 : olson 1.21 print "Assign $peg $func\n";
441 : olson 1.22 print $log_fh "Assign $peg $func\n";
442 :     print $log_fh " was $old\n";
443 : olson 1.21 $fig->assign_function($peg, 'master', $func);
444 : olson 1.23
445 : olson 1.21 }
446 : olson 1.20 }
447 : olson 1.19 }
448 :    
449 : olson 1.23 print $html_fh "</table>\n";
450 :    
451 :     print $html_fh "<h2>Annotations added</h2>\n";
452 : olson 1.25 print $html_fh "<table border=\"1\">\n";
453 : olson 1.23 print $html_fh "<tr><th>PEG</th><th>Time</th><th>Author</th><th>Annotation</th></tr>\n";
454 :    
455 : olson 1.19 open(my $outfh, ">$anno_file") or die "Cannot open new annotation file $anno_file: $!\n";
456 :    
457 :     my $last;
458 :     my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @annos;
459 :     my $inst = 0;
460 :     my $dup = 0;
461 :     foreach my $ann (@sorted)
462 :     {
463 : olson 1.29 my $txt = join("\n", @$ann[0..3]);
464 : olson 1.19 #
465 :     # Drop the trailing \n if there is one; we will add it back when we print and
466 :     # want to ensure the file format remains sane.
467 :     #
468 :     chomp $txt;
469 :     if ($txt ne $last)
470 :     {
471 : olson 1.23 my $peg = $ann->[0];
472 :     my $l = "$cgi_url/protein.cgi?prot=$peg";
473 : olson 1.26 if (!$ann->[4])
474 :     {
475 :     print $html_fh "<tr>" . join("\n", map { "<td>$_</td>" }
476 :     "<a href=\"$l\">$peg</a>",
477 :     scalar(localtime($ann->[1])), $ann->[2], $ann->[3])
478 :     . "</tr>\n";
479 :     }
480 : olson 1.23
481 : olson 1.19 print $outfh "$txt\n//\n";
482 :     $last = $txt;
483 : olson 1.21 # print "Inst $ann->[0] $ann->[1] $ann->[2]\n";
484 : olson 1.19 $inst++;
485 :     }
486 :     else
487 :     {
488 : olson 1.21 # print "Dup $ann->[0] $ann->[1] $ann->[2]\n";
489 : olson 1.19 $dup++;
490 :     }
491 :     }
492 : olson 1.23 print $html_fh "</table>\n";
493 : olson 1.19 close($outfh);
494 :     chmod(0666, $anno_file) or warn "Cannot chmod 0666 $anno_file: $!\n";
495 :     print "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
496 : olson 1.22 print $log_fh "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
497 : olson 1.15 }
498 : olson 1.23 close($html_fh);
499 : olson 1.1 }
500 :    
501 : olson 1.21 #
502 :     # Compute the peg mapping for a session.
503 :     #
504 :     # $fig Active FIG instance
505 :     # $peer P2P peer for this session.
506 :     # $session P2P session ID
507 :     # $peg_mapping Hash ref for the remote -> local PEG mapping
508 :     # $peg_cache Hash ref for the persistent remote -> local PEG mapping cache db.
509 : olson 1.24 # $cache_handle AnyDBM_File handle corresponding to $peg_cache.
510 : olson 1.21 #
511 :     sub _compute_peg_mapping
512 :     {
513 :     my($fig, $peer, $session, $num_pegs, $peg_mapping, $peg_cache, $cache_handle, $skip_tough_search) = @_;
514 :    
515 :     #
516 :     # genome_map is a hash mapping from target genome id to a list of
517 :     # pegs on the target. This is used to construct a finalize_pegs request after
518 :     # the first phase of peg mapping.
519 :     #
520 :    
521 :     my %genome_map;
522 :    
523 :     #
524 :     # target_genome_info is a hash mapping from target genome
525 :     # identifier to the target-side information on the genome -
526 :     # number of contigs, number of nucleotides, checksum.
527 :     #
528 :     # We accumulate it here across possibly multiple batches of
529 :     # peg retrievals in order to create a single finalization
530 :     # list.
531 :     #
532 :    
533 :     my %target_genome_info;
534 :    
535 :     #
536 :     # For very large transfers, we need to batch the peg processing.
537 :     #
538 :    
539 :     for (my $peg_start = 0; $peg_start < $num_pegs; $peg_start += $peg_batch_size)
540 :     {
541 :     my $peg_req_len = $num_pegs - $peg_start;
542 :     $peg_req_len = $peg_batch_size if $peg_req_len > $peg_batch_size;
543 :    
544 :     print "Getting $peg_req_len pegs at $peg_start\n";
545 : olson 1.22 print $log_fh "Getting $peg_req_len pegs at $peg_start\n";
546 : olson 1.21 my $ret = $peer->get_pegs($session, $peg_start, $peg_req_len);
547 :    
548 :     if (!$ret or ref($ret) ne "ARRAY")
549 :     {
550 :     die "perform_update: get_pegs failed\n";
551 :     }
552 :    
553 :     my($peg_list, $genome_list) = @$ret;
554 :    
555 :     for my $gent (@$genome_list)
556 :     {
557 :     $target_genome_info{$gent->[0]} = $gent;
558 :     }
559 :    
560 :     _compute_peg_mapping_batch($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
561 :     $peg_list, \%genome_map);
562 :     }
563 :    
564 :     #
565 :     # We have finished first pass. Now go over the per-genome mappings that need to be made.
566 :     #
567 :     # $genome_map{$genome_id} is a list of pegs that reside on that genome.
568 :     # The pegs and genome id are both target-based identifiers.
569 :     #
570 :     # %target_genome_info defines the list of genome information we have on the remote
571 :     # side.
572 :     #
573 :     # We build a request to be passed to finalize_pegs. Each entry in the request is either
574 :     # ['peg_genome', $peg] which means that we have a genome that corresponds to the
575 :     # genome the peg is in. We can attempt to map via contig locations.
576 :     #
577 :     # If that is not the case, we pass a request entry of ['peg_unknown', $peg]
578 :     # which will result in the sequence data being returned.
579 :     #
580 :    
581 :     my @finalize_req = ();
582 :    
583 :     #
584 :     # local_genome maps a target peg identifier to the local genome id it translates to.
585 :     #
586 :     my %local_genome;
587 :    
588 :     for my $genome (keys(%target_genome_info))
589 :     {
590 :     my($tg, $n_contigs, $n_nucs, $cksum) = @{$target_genome_info{$genome}};
591 :    
592 :     $tg eq $genome or die "Invalid entry in target_genome_info for $genome => $tg, $n_contigs, $n_nucs, $cksum";
593 :    
594 :     #
595 :     # Don't bother unless we have any pegs to look up.
596 :     #
597 :     next unless defined($genome_map{$genome});
598 :    
599 :     #
600 :     # Determine if we have a local genome installed that matches precisely the
601 :     # genome on the target side.
602 :     #
603 :     my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);
604 :    
605 :     my $pegs = $genome_map{$genome};
606 :    
607 :     if ($my_genome)
608 :     {
609 :     #
610 :     # We do have such a local genome. Generate a peg_genome request to
611 :     # get the location information from the target side.
612 :     #
613 :     # Also remember the local genome mapping for this peg.
614 :     #
615 :    
616 :     print "$genome mapped to $my_genome\n";
617 : olson 1.22 print $log_fh "$genome mapped to $my_genome\n";
618 : olson 1.21 for my $peg (@$pegs)
619 :     {
620 :     push(@finalize_req, ['peg_genome', $peg]);
621 :     $local_genome{$peg} = $my_genome;
622 :     }
623 :    
624 :     }
625 :     else
626 :     {
627 :     #
628 :     # We don't have such a genome. We need to retrieve the
629 :     # sequence data in order to finish mapping.
630 :     #
631 :     push(@finalize_req, map { ['peg_unknown', $_] } @$pegs);
632 :     }
633 :     }
634 :    
635 :     #
636 :     # We've built our finalization request. Handle it (possibly with batching here too).
637 :     #
638 :    
639 :     _process_finalization_request($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
640 :     \%local_genome, \@finalize_req, $skip_tough_search);
641 :    
642 :     }
643 :    
644 :     #
645 :     # Process one batch of PEGs.
646 :     #
647 :     # Same args as _compute_peg_mapping, with the addition of:
648 :     #
649 :     # $peg_list List of pegs to be processed
650 :     # $genome_map Hash maintaining list of genomes with their pegs.
651 :     # $target_genome_info Hash maintaining overall list of target-side genome information.
652 :     #
653 :     sub _compute_peg_mapping_batch
654 :     {
655 :     my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
656 :     $peg_list, $genome_map, $target_genome_info) = @_;
657 :    
658 :     #
659 :     # Walk the list of pegs as returned from get_pegs() and determine what has to
660 :     # be done.
661 :     #
662 :     # If the entry is ['peg', $peg], we can use the peg ID as is.
663 :     #
664 :     # If the entry is ['peg_info', $peg, $alias_list, $genome], the peg
665 :     # has the given aliases, and is in the given genome.
666 :     #
667 :     for my $peg_info (@$peg_list)
668 :     {
669 :     my($key, $peg, @rest) = @$peg_info;
670 :    
671 :     if ($key eq 'peg')
672 :     {
673 :     #
674 :     # Peg id is directly usable.
675 :     #
676 :     $peg_mapping->{$peg} = $peg;
677 :     }
678 :     elsif ($key eq 'peg_info')
679 :     {
680 :     #
681 :     # Peg id not directly usable. See if we have it in the cache.
682 :     #
683 :    
684 :     if ((my $cached = $peg_cache->{$peg}) ne "")
685 :     {
686 :     #
687 :     # Cool, we've cached the result. Use it.
688 :     #
689 :    
690 :     $peg_mapping->{$peg} = $cached;
691 :     # warn "Found cached mapping $peg => $cached\n";
692 :     next;
693 :     }
694 :    
695 :     #
696 :     # It is not cached. Attempt to resolve by means of alias IDs.
697 :     #
698 :    
699 :     my($alias_list, $genome_id) = @rest;
700 :    
701 :     for my $alias (@$alias_list)
702 :     {
703 :     my $mapped = $fig->by_alias($alias);
704 :     if ($mapped)
705 :     {
706 :     print "$peg maps to $mapped via $alias\n";
707 : olson 1.22 print $log_fh "$peg maps to $mapped via $alias\n";
708 : olson 1.21 $peg_mapping->{$peg}= $mapped;
709 :     $peg_cache->{$peg} = $mapped;
710 :     last;
711 :     }
712 :     }
713 :    
714 :     #
715 :     # If we weren't able to resolve by ID,
716 :     # add to %genome_map as a PEG that will need
717 :     # to be resolved by means of contig location.
718 :     #
719 :    
720 :     if (!defined($peg_mapping->{$peg}))
721 :     {
722 :     push(@{$genome_map->{$genome_id}}, $peg);
723 :     print "$peg did not map on first pass\n";
724 : olson 1.22 print $log_fh "$peg did not map on first pass\n";
725 : olson 1.21 }
726 :     }
727 :     }
728 :    
729 :     #
730 :     # Flush the cache to write out any computed mappings.
731 :     #
732 : olson 1.24 eval { $cache_handle->sync();};
733 : olson 1.21
734 :     }
735 :    
736 :     sub _process_finalization_request
737 :     {
738 :     my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
739 :     $local_genome, $finalize_req, $skip_tough_search) = @_;
740 :    
741 :     #
742 :     # Immediately return unless there's something to do.
743 :     #
744 :     return unless ref($finalize_req) and @$finalize_req > 0;
745 : olson 1.1
746 : olson 1.21 while (@$finalize_req > 0)
747 :     {
748 :     my @req = splice(@$finalize_req, 0, $fin_batch_size);
749 :    
750 :     print "Invoking finalize_pegs on ", int(@req), " pegs\n";
751 : olson 1.22 print $log_fh "Invoking finalize_pegs on ", int(@req), " pegs\n";
752 : olson 1.21 my $ret = $peer->finalize_pegs($session, \@req);
753 :    
754 :     if (!$ret or ref($ret) ne "ARRAY")
755 :     {
756 :     die "perform_update: finalize_pegs failed\n";
757 :     }
758 :    
759 :     #
760 :     # The return is a list of either location entries or
761 :     # sequence data. Attempt to finish up the mapping.
762 :     #
763 :    
764 :     my(%sought, %sought_seq);
765 :    
766 :    
767 :     my $dbh = $fig->db_handle();
768 :     for my $entry (@$ret)
769 :     {
770 :     my($what, $peg, @rest) = @$entry;
771 :    
772 :     if ($what eq "peg_loc")
773 :     {
774 :     my($strand, $start, $end, $cksum, $seq) = @rest;
775 :    
776 :     #
777 :     # We have a contig location. Try to find a matching contig
778 :     # here, and see if it maps to something.
779 :     #
780 :    
781 :     my $my_genome = $local_genome->{$peg};
782 :     my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
783 :     if ($local_contig)
784 :     {
785 :     #
786 :     # Now look up the local peg. We match on the end location; depending on the strand
787 :     # the feature is on, we want to look at either minloc or maxloc.
788 :     #
789 :    
790 : olson 1.27 my($start_loc, $end_loc);
791 :    
792 :     if ($strand eq '-')
793 :     {
794 :     $start_loc = 'maxloc';
795 :     $end_loc = 'minloc';
796 :     }
797 :     else
798 :     {
799 :     $start_loc = 'minloc';
800 :     $end_loc = 'maxloc';
801 :     }
802 : olson 1.21
803 : olson 1.27 my $res = $dbh->SQL(qq!SELECT id, $start_loc from features
804 :     WHERE $end_loc = $end and genome = '$my_genome' and
805 : olson 1.21 contig = '$local_contig'
806 :     !);
807 :    
808 :     if ($res and @$res > 0)
809 :     {
810 : olson 1.27 my $id;
811 :     if (@$res == 1)
812 :     {
813 :     #
814 :     # Found a unique mapping.
815 :     #
816 :     $id = $res->[0]->[0];
817 :     }
818 :     else
819 :     {
820 :     #
821 :     # Multiple mappings found. See if one matches the
822 :     # start location. If it doesn't, pick the one that
823 :     # is closest in length.
824 :     #
825 :    
826 :     my @lens;
827 :    
828 :     for my $res_ent (@$res)
829 :     {
830 :     my($rid, $rloc) = @$res_ent;
831 :    
832 :     push(@lens, [$rid, abs($rloc - $end - ($start - $end))]);
833 :     warn "Matching $rid $rloc to $start\n";
834 :     if ($rloc == $start)
835 :     {
836 :     $id = $rid;
837 :     warn "Matched $rid\n";
838 :     last;
839 :     }
840 :     }
841 :    
842 :     if (!$id)
843 :     {
844 :     my @slens = sort { $a->[1] <=> $b->[1]} @lens;
845 :     my $len;
846 :     ($id, $len) = @{$slens[0]};
847 :     warn "No unique match found, picking closest match $id (len=$len)\n";
848 :     }
849 :     }
850 :    
851 : olson 1.21 $peg_mapping->{$peg} = $id;
852 :     $peg_cache->{$peg} = $id;
853 :     print "Mapped $peg to $id via contigs\n";
854 :     }
855 :     else
856 :     {
857 :     print "failed: $peg $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
858 : olson 1.22 print $log_fh "failed: $peg $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
859 : olson 1.23 print $html_fh "Contig match failed: $peg $my_genome contig $local_contig start $start end $end strand $strand<br>\n";
860 : olson 1.21 $sought{$peg}++;
861 :     $sought_seq{$peg} = $seq;
862 :     }
863 :     }
864 :     else
865 :     {
866 :     print "Mapping failed for $my_genome checksum $cksum\n";
867 : olson 1.22 print $log_fh "Mapping failed for $my_genome checksum $cksum\n";
868 : olson 1.23 print $html_fh "Mapping failed for $my_genome checksum $cksum<br>\n";
869 : olson 1.21 $sought{$peg}++;
870 :     $sought_seq{$peg} = $seq;
871 :     }
872 :     }
873 :     elsif ($what eq "peg_seq")
874 :     {
875 :     my($seq) = @rest;
876 :    
877 :     $sought{$peg}++;
878 :     $sought_seq{$peg} = $seq;
879 :     }
880 :     }
881 :    
882 :     #
883 :     # Now see if we need to do a tough search.
884 :     #
885 :    
886 :     if (keys(%sought) > 0 and !$skip_tough_search)
887 :     {
888 :     my %trans;
889 :    
890 :     print "Starting tough search\n";
891 : olson 1.22 print $log_fh "Starting tough search\n";
892 : olson 1.21
893 :     $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
894 :     print "Tough search translated: \n";
895 : olson 1.22 print $log_fh "Tough search translated: \n";
896 : olson 1.21 while (my($tpeg, $ttrans) = each(%trans))
897 :     {
898 :     print " $tpeg -> $ttrans\n";
899 : olson 1.22 print $log_fh " $tpeg -> $ttrans\n";
900 : olson 1.21 $peg_mapping->{$tpeg} = $ttrans;
901 :     $peg_cache->{$tpeg} = $ttrans;
902 :     }
903 :     }
904 :     }
905 :     }
906 : olson 1.15
907 : olson 1.1 #############
908 :     #
909 :     # P2P Relay
910 :     #
911 :     #############
912 :    
913 :    
914 :     package P2P::Relay;
915 :     use strict;
916 :    
917 :     use Data::Dumper;
918 :     use SOAP::Lite;
919 :    
920 :     use P2P;
921 :    
922 :     sub new
923 :     {
924 :     my($class, $url) = @_;
925 :    
926 : olson 1.25 my $creds = [];
927 : olson 1.24
928 :     my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy([$url,
929 :     credentials => $creds]);
930 : olson 1.1
931 :     my $self = {
932 :     url => $url,
933 :     proxy => $proxy,
934 :     };
935 :     return bless($self, $class);
936 :     }
937 :    
938 :     sub enumerate_annotation_systems
939 :     {
940 :     my($self) = @_;
941 :    
942 :     return $self->{proxy}->enumerate_annotation_systems()->result;
943 :     }
944 :    
945 :     sub fetch_queries
946 :     {
947 :     my($self, $id) = @_;
948 :    
949 :     my $reply = $self->{proxy}->fetch_queries($id);
950 :    
951 :     if ($reply->fault)
952 :     {
953 :     print "Failed to fetch queries: ", $reply->faultcode, " ", $reply->faultstring, "\n";
954 :     return undef;
955 :     }
956 :    
957 :     return $reply->result;
958 :     }
959 :    
960 :     sub deposit_answer
961 :     {
962 :     my($self, $id, $key, $answer) = @_;
963 :    
964 :     my $reply = $self->{proxy}->deposit_answer($id, $key,
965 :     SOAP::Data->type('base64')->value($answer));
966 :    
967 :     if ($reply->fault)
968 :     {
969 :     print "deposit_answer got fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
970 :     return undef;
971 :     }
972 :    
973 :     return $reply;
974 :     }
975 :    
976 :     =pod
977 :    
978 :     =head1 await_result
979 :    
980 :     Await the result from a possibly-asynchronous soap request.
981 :    
982 :     Look at the reply that we have. If it's a deferred reply, loop polling
983 :     the relay for the actual result.
984 :    
985 :     We determine if the reply is a deferred reply by examining the namespace
986 :     URI of the response. A response will be generated from the relay's namespace,
987 :     rather than that of the application itself.
988 :    
989 :     =cut
990 :    
991 :     sub await_result
992 :     {
993 :     my($self, $reply) = @_;
994 :    
995 :     while (1)
996 :     {
997 :     #
998 :     # Retrieve the namespace of the response, which is the first
999 :     # element in the body of the message.
1000 :     #
1001 :     my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');
1002 : olson 1.20 # print "Reply ns=$ns want $P2P::ns_relay\n";
1003 : olson 1.1
1004 :     if ($ns eq $P2P::ns_relay)
1005 :     {
1006 :     my $val = $reply->result;
1007 : olson 1.20 # print "got val=", Dumper($val);
1008 : olson 1.1 if ($val->[0] eq 'deferred')
1009 :     {
1010 :     #
1011 :     # Sleep a little, then try to retrieve the response.
1012 :     #
1013 :    
1014 :     sleep(1);
1015 :     my $id = $val->[1];
1016 :    
1017 :     print "Retrieving reply\n";
1018 :     $reply = $self->{proxy}->call_completed($id);
1019 :     }
1020 :     else
1021 :     {
1022 :     #
1023 :     # We're not sure what to do here..
1024 :     #
1025 :     return undef;
1026 :     }
1027 :     }
1028 :     else
1029 :     {
1030 :     #
1031 :     # We got an actual response. Return it.
1032 :     #
1033 :    
1034 :     return $reply;
1035 :     }
1036 :     }
1037 :     }
1038 :    
1039 :     #############
1040 :     #
1041 :     # P2P Requestor
1042 :     #
1043 :     #############
1044 :    
1045 :     package P2P::Requestor;
1046 :     use strict;
1047 :    
1048 :     use Data::Dumper;
1049 : olson 1.15 use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
1050 : olson 1.1
1051 :     use SOAP::Lite;
1052 : olson 1.15
1053 :     #use SOAP::Lite +trace => [qw(transport dispatch result debug)];
1054 : olson 1.1 use P2P;
1055 :    
1056 :     #
1057 :     # Create a new Requestor. It contains a reference to the FIG instance
1058 :     # so that we can run the protocol completely from in here.
1059 :     #
1060 :    
1061 :     sub new
1062 :     {
1063 : olson 1.24 my($class, $fig, $url, $peer_id, $relay, $credentials) = @_;
1064 : olson 1.1
1065 : olson 1.24 $credentials = [] unless ref($credentials);
1066 :    
1067 : olson 1.27 my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url, timeout => 3600);
1068 :    
1069 :     for my $cred (@$credentials)
1070 :     {
1071 :     $proxy->transport->credentials(@$cred);
1072 :     }
1073 : olson 1.1
1074 :     my $self = {
1075 :     fig => $fig,
1076 :     url => $url,
1077 :     peer_id => $peer_id,
1078 :     proxy => $proxy,
1079 :     relay => $relay,
1080 :     };
1081 :     return bless($self, $class);
1082 :     }
1083 :    
1084 :     #
1085 :     # First step: Request an update.
1086 :     #
1087 :     # We need to determine some notion of what our release is, since we are not
1088 :     # currently tagging them explicitly. Until we delve into this more,
1089 :     # I am going to return a null release, which means the same-release
1090 :     # optimization won't be able to kick in.
1091 :     #
1092 :     # We also need to determine the last time we got an update from this
1093 :     # system.
1094 :     #
1095 :    
1096 :     sub request_update
1097 :     {
1098 : olson 1.20 my($self, $last_update, $update_thru) = @_;
1099 : olson 1.1
1100 : olson 1.18 my $rel = [$self->{fig}->get_release_info()];
1101 : olson 1.1
1102 :     if (!defined($last_update))
1103 :     {
1104 :     $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});
1105 :     }
1106 : olson 1.17
1107 :     print "Requesting update via $self->{proxy}\n";
1108 : olson 1.20 my $reply = $self->{proxy}->request_update($rel, $last_update, $update_thru);
1109 :     # print "Got reply ", Dumper($reply);
1110 : olson 1.1
1111 :     if ($self->{relay})
1112 :     {
1113 :     $reply = $self->{relay}->await_result($reply);
1114 :     }
1115 :    
1116 :     if ($reply->fault)
1117 :     {
1118 :     print "request_update triggered fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
1119 :     return undef;
1120 :     }
1121 :    
1122 :     return $reply->result;
1123 :     }
1124 :    
1125 :     =pod
1126 :    
1127 :     =head1 get_pegs($session_id, $start, $length)
1128 :    
1129 :    
1130 :     =cut
1131 :    
1132 :     sub get_pegs
1133 :     {
1134 :     my($self, $session_id, $start, $length) = @_;
1135 :    
1136 :     return $self->call("get_pegs", $session_id, $start, $length);
1137 :     }
1138 :    
1139 : olson 1.6 sub finalize_pegs
1140 :     {
1141 :     my($self, $session_id, $request) = @_;
1142 :    
1143 :     return $self->call("finalize_pegs", $session_id, $request);
1144 :     }
1145 :    
1146 : olson 1.15 sub get_annotations
1147 :     {
1148 :     my($self, $session_id, $start, $length) = @_;
1149 :    
1150 :     return $self->call("get_annotations", $session_id, $start, $length);
1151 :     }
1152 :    
1153 : olson 1.19 sub get_assignments
1154 :     {
1155 :     my($self, $session_id, $start, $length) = @_;
1156 :    
1157 :     return $self->call("get_assignments", $session_id, $start, $length);
1158 :     }
1159 :    
1160 : olson 1.1 sub call
1161 :     {
1162 :     my($self, $func, @args) = @_;
1163 : olson 1.15
1164 :     my $t0 = [gettimeofday()];
1165 :     print "Calling $func\n";
1166 : olson 1.1 my $reply = $self->{proxy}->$func(@args);
1167 : olson 1.15 my $t1 = [gettimeofday()];
1168 :    
1169 :     my $elap = tv_interval($t0, $t1);
1170 :     print "Call to $func took $elap\n";
1171 : olson 1.1
1172 :     if ($self->{relay})
1173 :     {
1174 :     $reply = $self->{relay}->await_result($reply);
1175 :     }
1176 :    
1177 :     if ($reply->fault)
1178 :     {
1179 :     print "$func triggered fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
1180 :     return undef;
1181 :     }
1182 :    
1183 :     return $reply->result;
1184 :     }
1185 :    
1186 :    
1187 :     #############
1188 :     #
1189 :     # P2P Service
1190 :     #
1191 :     # Code in this module is invoked on the target on behalf of a requestor.
1192 :     #
1193 :     #############
1194 :    
1195 :     package P2P::Service;
1196 :    
1197 :     use Data::Dumper;
1198 :    
1199 :     use FIG;
1200 :     use FIG_Config;
1201 :     use strict;
1202 :    
1203 :     use File::Temp qw(tempdir);
1204 :     use File::Basename;
1205 :    
1206 :     sub request_update
1207 :     {
1208 : olson 1.20 my($class, $his_release, $last_update, $update_thru)= @_;
1209 : olson 1.1
1210 :     #
1211 :     # Verify input.
1212 :     #
1213 :    
1214 :     if ($last_update !~ /^\d+$/)
1215 :     {
1216 :     die "request_update: last_update must be a number (not '$last_update')\n";
1217 :     }
1218 :    
1219 : olson 1.20 if ($update_thru eq "")
1220 :     {
1221 :     $update_thru = time + 10000;
1222 :     }
1223 :    
1224 : olson 1.1 #
1225 :     # Create a new session id and a spool directory to use for storage
1226 :     # of information about it. This can go in the tempdir since it is
1227 :     # not persistent.
1228 :     #
1229 :    
1230 :     &FIG::verify_dir("$FIG_Config::temp/p2p_spool");
1231 : olson 1.22 my $spool_dir = tempdir(DIR => "$FIG_Config::temp/p2p_spool");
1232 : olson 1.1
1233 : olson 1.22 #my $spool_dir = "$FIG_Config::temp/p2p_spool/test";
1234 : olson 1.1 &FIG::verify_dir($spool_dir);
1235 :    
1236 :     my $session_id = basename($spool_dir);
1237 :     my $now = time;
1238 :    
1239 :     #
1240 :     # Gather the list of pegs and annotations for the update.
1241 :     #
1242 :    
1243 :     my $fig = new FIG;
1244 :    
1245 :     my $all_genomes = [$fig->genomes];
1246 :    
1247 :     my %all_genomes = map { $_ => 1 } @$all_genomes;
1248 :    
1249 :     my %pegs;
1250 : olson 1.15
1251 :     #
1252 :     # We keep track of usernames that have been seen, so that
1253 :     # we can both update our local user database and
1254 :     # we can report them to our peer.
1255 :     #
1256 :    
1257 :     my %users;
1258 : olson 1.1
1259 :     my $num_annos = 0;
1260 :     my $num_genomes = 0;
1261 :     my $num_pegs = 0;
1262 : olson 1.15 my $num_assignments = 0;
1263 : olson 1.1
1264 :     my $anno_fh;
1265 :     open($anno_fh, ">$spool_dir/annos");
1266 :    
1267 :     my $peg_fh;
1268 :     open($peg_fh, ">$spool_dir/pegs");
1269 :    
1270 :     my $genome_fh;
1271 :     open($genome_fh, ">$spool_dir/genomes");
1272 :    
1273 : olson 1.15 my $assign_fh;
1274 :     open($assign_fh, ">$spool_dir/assignments");
1275 :    
1276 : olson 1.25 #
1277 :     # We originally used a query to get the PEGs that needed to have annotations
1278 :     # sent. Unfortunately, this performed very poorly due to all of the resultant
1279 :     # seeking around in the annotations files.
1280 :     #
1281 :     # The code below just runs through all of the anno files looking for annos.
1282 :     #
1283 :     # A better way to do this would be to do a query to retrieve the genome id's for
1284 :     # genomes that have updates. The problem here is that the annotation_seeks
1285 :     # table doesn't have an explicit genome field.
1286 :     #
1287 :     # Surprisingly, to me anyway, the following query appers to run quickly, in both
1288 :     # postgres and mysql:
1289 :     #
1290 :     # SELECT distinct(substring(fid from 5 for position('.peg.' in fid) - 5))
1291 :     # FROM annotation_seeks
1292 :     # WHERE dateof > some-date.
1293 :     #
1294 :     # The output of that can be parsed to get the genome id and just those
1295 :     # annotations files searched.
1296 :     #
1297 :    
1298 : olson 1.1 for my $genome (@$all_genomes)
1299 :     {
1300 :     my $num_annos_for_genome = 0;
1301 : olson 1.15 my %assignment;
1302 : olson 1.1
1303 :     my $genome_dir = "$FIG_Config::organisms/$genome";
1304 :     next unless -d $genome_dir;
1305 :    
1306 :     my $afh;
1307 :     if (open($afh, "$genome_dir/annotations"))
1308 :     {
1309 :     my($fid, $anno_time, $who, $anno_text);
1310 :     local($/);
1311 :     $/ = "//\n";
1312 :     while (my $ann = <$afh>)
1313 :     {
1314 :     chomp $ann;
1315 :    
1316 :     if ((($fid, $anno_time, $who, $anno_text) =
1317 :     ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and
1318 : olson 1.20 $anno_time > $last_update and
1319 :     $anno_time < $update_thru)
1320 : olson 1.1
1321 :     {
1322 :     #
1323 : olson 1.15 # Update users list.
1324 :     #
1325 :    
1326 :     $users{$who}++;
1327 :    
1328 :     #
1329 : olson 1.1 # Look up aliases if we haven't seen this fid before.
1330 :     #
1331 :    
1332 :     if (!defined($pegs{$fid}))
1333 :     {
1334 :     my @aliases = $fig->feature_aliases($fid);
1335 :    
1336 :     print $peg_fh join("\t", $fid, $genome, @aliases), "\n";
1337 :     $num_pegs++;
1338 :     }
1339 :    
1340 :     print $anno_fh "$ann//\n";
1341 :    
1342 :     $pegs{$fid}++;
1343 :    
1344 :     $num_annos_for_genome++;
1345 :     $num_annos++;
1346 : olson 1.15
1347 :     #
1348 :     # While we're here, see if this is an assignment. We check in the
1349 :     # %assignment hash, which is keyed on fid, to see if we already
1350 :     # saw an assignment for this fid. If we have, we keep this one only if
1351 :     # the assignment time on it is later than the one we saw already.
1352 :     #
1353 :     # We are only looking at master assignments for now. We will need
1354 :     # to return to this issue and reexamine it, but in order to move
1355 :     # forward I am only matching master assignments.
1356 :     #
1357 :    
1358 :     if ($anno_text =~ /Set master function to\n(\S[^\n]+\S)/)
1359 :     {
1360 :     my $func = $1;
1361 :    
1362 :     my $other = $assignment{$fid};
1363 :    
1364 :     #
1365 :     # If we haven't seen an assignment for this fid,
1366 :     # or if it the other assignment has a timestamp that
1367 :     # is earlier than this one, set the assignment.
1368 :     #
1369 :    
1370 :     if (!defined($other) or
1371 :     ($other->[1] < $anno_time))
1372 :     {
1373 :     $assignment{$fid} = [$fid, $anno_time, $who, $func];
1374 :     }
1375 :     }
1376 : olson 1.1 }
1377 :     }
1378 :     close($afh);
1379 : olson 1.15
1380 :     #
1381 :     # Write out the assignments that remain.
1382 :     #
1383 :    
1384 :     for my $fid (sort keys(%assignment))
1385 :     {
1386 :     print $assign_fh join("\t", @{$assignment{$fid}}), "\n";
1387 :     $num_assignments++;
1388 :     }
1389 : olson 1.1 }
1390 : olson 1.15
1391 : olson 1.1
1392 :     #
1393 :     # Determine genome information if we have annotations for this one.
1394 :     #
1395 :    
1396 :     if ($num_annos_for_genome > 0)
1397 :     {
1398 :     $num_genomes++;
1399 :     if (open(my $cfh, "<$genome_dir/COUNTS"))
1400 :     {
1401 :     if ($_ = <$cfh>)
1402 :     {
1403 :     chomp;
1404 :     my($cgenome, $n_contigs, $total_nucs, $cksum) = split(/\t/, $_);
1405 :     if ($cgenome ne $genome)
1406 :     {
1407 :     warn "Hm, $genome has a COUNTS file with genome=$cgenome that does not match\n";
1408 :     }
1409 :     else
1410 :     {
1411 :     print $genome_fh join("\t",
1412 :     $genome, $num_annos_for_genome, $n_contigs,
1413 :     $total_nucs, $cksum), "\n";
1414 :     }
1415 :     }
1416 :     }
1417 :     }
1418 :    
1419 :     }
1420 :     close($anno_fh);
1421 :     close($peg_fh);
1422 :     close($genome_fh);
1423 : olson 1.15 close($assign_fh);
1424 : olson 1.1
1425 :     print "Pegs: $num_pegs\n";
1426 :     print "Genomes: $num_genomes\n";
1427 :     print "Annos: $num_annos\n";
1428 :    
1429 :     #
1430 :     # Check compatibility.
1431 :     #
1432 :    
1433 : olson 1.18 my $my_release = [$fig->get_release_info()];
1434 :    
1435 :     #
1436 :     # Release id is $my_release->[1].
1437 :     #
1438 :    
1439 :     my $compatible;
1440 :     if ($my_release->[1] ne "" and $his_release->[1] ne "")
1441 :     {
1442 :     #
1443 :     # Both releases must be defined for them to be compatible.
1444 :     #
1445 :     # At some point we need to consider the derived-release issue.
1446 :     #
1447 :    
1448 :     $compatible = $my_release->[1] eq $his_release->[1];
1449 :     }
1450 :     else
1451 :     {
1452 :     $compatible = 0;
1453 :     }
1454 : olson 1.1
1455 :     open(my $fh, ">$spool_dir/INFO");
1456 :     print $fh "requestor_release\t$his_release\n";
1457 :     print $fh "last_update\t$last_update\n";
1458 : olson 1.20 print $fh "update_thru\t$update_thru\n";
1459 : olson 1.1 print $fh "cur_update\t$now\n";
1460 :     print $fh "target_release\t$my_release\n";
1461 :     print $fh "compatible\t$compatible\n";
1462 :     print $fh "num_pegs\t$num_pegs\n";
1463 :     print $fh "num_genomes\t$num_genomes\n";
1464 :     print $fh "num_annos\t$num_annos\n";
1465 : olson 1.15 print $fh "num_assignments\t$num_assignments\n";
1466 : olson 1.1 close($fh);
1467 :    
1468 : olson 1.15 #
1469 :     # Construct list of users, and pdate local user database.
1470 :     #
1471 :    
1472 :     my @users = keys(%users);
1473 : olson 1.17 # $fig->ensure_users(\@users);
1474 : olson 1.15
1475 :     return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
1476 : olson 1.16 $now, $compatible, \@users];
1477 : olson 1.1 }
1478 :    
1479 :    
1480 :     sub get_pegs
1481 :     {
1482 :     my($self, $session_id, $start, $len) = @_;
1483 :     my(%session_info);
1484 :    
1485 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1486 :    
1487 :     -d $spool_dir or die "Invalid session id $session_id";
1488 :    
1489 :     #
1490 :     # Read in the cached information for this session.
1491 :     #
1492 :    
1493 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1494 :     while (<$info_fh>)
1495 :     {
1496 :     chomp;
1497 :     my($var, $val) = split(/\t/, $_, 2);
1498 :     $session_info{$var} = $val;
1499 :     }
1500 :     close($info_fh);
1501 :    
1502 :     #
1503 :     # Sanity check start and length.
1504 :     #
1505 :    
1506 :     if ($start < 0 or $start >= $session_info{num_pegs})
1507 :     {
1508 :     die "Invalid start position $start";
1509 :     }
1510 :    
1511 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_pegs})
1512 :     {
1513 :     die "Invalid length $len";
1514 :     }
1515 :    
1516 :     #
1517 :     # Open file, spin to the starting line, then start reading.
1518 :     #
1519 :    
1520 :     open(my $peg_fh, "<$spool_dir/pegs") or die "Cannot open pegs file: $!";
1521 :    
1522 :     my $peg_output = [];
1523 :     my $genome_output = [];
1524 :    
1525 :     my $peg_num = 0;
1526 :     my $genomes_to_show = [];
1527 :     my %genomes_to_show;
1528 :    
1529 :     my($fid, $genome, @aliases);
1530 :    
1531 :     while (<$peg_fh>)
1532 :     {
1533 :     next if ($peg_num < $start);
1534 :    
1535 :     last if ($peg_num > ($start + $len));
1536 :    
1537 :     chomp;
1538 :    
1539 :     #
1540 :     # OK, this is a peg to process.
1541 :     # It's easy if we're compatible.
1542 :     #
1543 :    
1544 :     ($fid, $genome, @aliases) = split(/\t/, $_);
1545 :    
1546 :     if ($session_info{compatible})
1547 :     {
1548 :     push(@$peg_output, ['peg', $fid]);
1549 :     }
1550 :     else
1551 :     {
1552 :     if (!$genomes_to_show{$genome})
1553 :     {
1554 :     push(@$genomes_to_show, $genome);
1555 :     $genomes_to_show{$genome}++;
1556 :     }
1557 :     push(@$peg_output, ['peg_info', $fid, [@aliases], $genome]);
1558 :     }
1559 :     }
1560 :     continue
1561 :     {
1562 :     $peg_num++;
1563 :     }
1564 :    
1565 :     #
1566 :     # Read the genomes file, returning information about genomes referenced
1567 :     # in the pegs returned.
1568 :     #
1569 :    
1570 :     my $n_left = @$genomes_to_show;
1571 :    
1572 :     open(my $gfh, "<$spool_dir/genomes") or die "Cannot open genomes file: $!";
1573 :     while ($n_left > 0 and $_ = <$gfh>)
1574 :     {
1575 :     chomp;
1576 :    
1577 :     my($genome, $n_annos, $n_contigs, $n_nucs, $cksum) = split(/\t/);
1578 :    
1579 :     if ($genomes_to_show{$genome})
1580 :     {
1581 :     push(@$genome_output, [$genome, $n_contigs, $n_nucs, $cksum]);
1582 :     $n_left--;
1583 :     }
1584 :     }
1585 :     close($gfh);
1586 :    
1587 :     return [$peg_output, $genome_output];
1588 :     }
1589 : olson 1.6
1590 :     sub finalize_pegs
1591 :     {
1592 :     my($self, $session, $request) = @_;
1593 :     my($out);
1594 :    
1595 :     my $fig = new FIG;
1596 :    
1597 :     #
1598 :     # Walk the request handling appropriately. This is fairly easy, as it
1599 :     # is just a matter of pulling either sequence or location/contig data.
1600 :     #
1601 :    
1602 :     for my $item (@$request)
1603 :     {
1604 :     my($what, $peg) = @$item;
1605 :    
1606 :     if ($what eq "peg_genome")
1607 :     {
1608 :     #
1609 :     # Return the location and contig checksum for this peg.
1610 :     #
1611 : olson 1.13 # We also include the sequence in case the contig mapping doesn't work.
1612 :     #
1613 : olson 1.6
1614 :     my $loc = $fig->feature_location($peg);
1615 :     my $contig = $fig->contig_of($loc);
1616 : olson 1.7 my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);
1617 : olson 1.13 my $seq = $fig->get_translation($peg);
1618 : olson 1.6
1619 :     push(@$out, ['peg_loc', $peg,
1620 : olson 1.13 $fig->strand_of($peg),
1621 : olson 1.6 $fig->beg_of($loc), $fig->end_of($loc),
1622 : olson 1.13 $cksum, $seq]);
1623 : olson 1.6
1624 :     }
1625 : olson 1.7 elsif ($what eq "peg_unknown")
1626 : olson 1.6 {
1627 :     my $seq = $fig->get_translation($peg);
1628 :     push(@$out, ['peg_seq', $peg, $seq]);
1629 :     }
1630 :     }
1631 :     return $out;
1632 :     }
1633 :    
1634 : olson 1.15
1635 :     sub get_annotations
1636 :     {
1637 :     my($self, $session_id, $start, $len) = @_;
1638 :    
1639 :     #
1640 :     # This is now easy; just run thru the saved annotations and return.
1641 :     #
1642 :    
1643 :     my(%session_info);
1644 :    
1645 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1646 :    
1647 :     -d $spool_dir or die "Invalid session id $session_id";
1648 :    
1649 :     #
1650 :     # Read in the cached information for this session.
1651 :     #
1652 :    
1653 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1654 :     while (<$info_fh>)
1655 :     {
1656 :     chomp;
1657 :     my($var, $val) = split(/\t/, $_, 2);
1658 :     $session_info{$var} = $val;
1659 :     }
1660 :     close($info_fh);
1661 :    
1662 :     #
1663 :     # Sanity check start and length.
1664 :     #
1665 :    
1666 :     if ($start < 0 or $start >= $session_info{num_annos})
1667 :     {
1668 :     die "Invalid start position $start";
1669 :     }
1670 :    
1671 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_annos})
1672 :     {
1673 :     die "Invalid length $len";
1674 :     }
1675 :    
1676 :     #
1677 :     # Open file, spin to the starting line, then start reading.
1678 :     #
1679 :    
1680 :     open(my $anno_fh, "<$spool_dir/annos") or die "Cannot open annos file: $!";
1681 :    
1682 :     my $anno_output = [];
1683 :    
1684 :     my $anno_num = 0;
1685 :    
1686 :     local $/ = "//\n";
1687 :     while (<$anno_fh>)
1688 :     {
1689 :     next if ($anno_num < $start);
1690 :    
1691 :     last if ($anno_num > ($start + $len));
1692 :    
1693 :     chomp;
1694 :    
1695 :     my($id, $date, $author, $anno) = split(/\n/, $_, 4);
1696 :    
1697 :     push(@$anno_output, [$id, $date, $author, $anno]);
1698 :     }
1699 :     continue
1700 :     {
1701 :     $anno_num++;
1702 :     }
1703 :    
1704 :     return $anno_output;
1705 :     }
1706 : olson 1.19
1707 :     sub get_assignments
1708 :     {
1709 :     my($self, $session_id, $start, $len) = @_;
1710 :    
1711 :     #
1712 :     # This is now easy; just run thru the saved assignments and return.
1713 :     #
1714 :    
1715 :     my(%session_info);
1716 :    
1717 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1718 :    
1719 :     -d $spool_dir or die "Invalid session id $session_id";
1720 :    
1721 :     #
1722 :     # Read in the cached information for this session.
1723 :     #
1724 :    
1725 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1726 :     while (<$info_fh>)
1727 :     {
1728 :     chomp;
1729 :     my($var, $val) = split(/\t/, $_, 2);
1730 :     $session_info{$var} = $val;
1731 :     }
1732 :     close($info_fh);
1733 :    
1734 :     #
1735 :     # Sanity check start and length.
1736 :     #
1737 :    
1738 :     if ($start < 0 or $start >= $session_info{num_assignments})
1739 :     {
1740 :     die "Invalid start position $start";
1741 :     }
1742 :    
1743 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_assignments})
1744 :     {
1745 :     die "Invalid length $len";
1746 :     }
1747 :    
1748 :     #
1749 :     # Open file, spin to the starting line, then start reading.
1750 :     #
1751 :    
1752 :     open(my $assign_fh, "<$spool_dir/assignments") or die "Cannot open assignments file: $!";
1753 :    
1754 :     my $assign_output = [];
1755 :    
1756 :     my $assign_num = 0;
1757 :    
1758 :     while (<$assign_fh>)
1759 :     {
1760 :     next if ($assign_num < $start);
1761 :    
1762 :     last if ($assign_num > ($start + $len));
1763 :    
1764 :     chomp;
1765 :    
1766 :     my($id, $date, $author, $func) = split(/\t/, $_, 4);
1767 :    
1768 :     push(@$assign_output, [$id, $date, $author, $func]);
1769 :     }
1770 :     continue
1771 :     {
1772 :     $assign_num++;
1773 :     }
1774 :    
1775 :     return $assign_output;
1776 :     }
1777 :    
1778 : olson 1.22 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3