[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (view) (download) (as text)

1 : olson 1.1 #
2 :     # This module contains the code for the P2P update protocol.
3 :     #
4 :     # Package P2P contains the namespace declarations, and possibly toplevel utility
5 :     # routines. (get_relay ?)
6 :     #
7 :     # Package P2P::Relay contains methods for contacting the P2P relay service. The actual
8 :     # implementation of the relay service is not contained here - it is a standalone module
9 :     # that can be installed on a web server that does not have a full SEED installed.
10 :     #
11 :     # Package P2P::Requestor contains the requestor-side code for the update protocol.
12 :     #
13 :     # Package P2P::Service contains the web service implementation routines for the
14 :     # protocol.
15 :     #
16 :    
17 :     package P2P;
18 :    
19 :     use FIG_Config;
20 :    
21 : olson 1.19 use DB_File;
22 :     use Fcntl;
23 :    
24 : olson 1.1 use strict;
25 :     use Exporter;
26 :     use base qw(Exporter);
27 :    
28 : olson 1.15 use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
29 :    
30 : olson 1.1 use Data::Dumper;
31 :    
32 :     use vars qw(@EXPORT @EXPORT_OK);
33 :     @EXPORT = ();
34 :     @EXPORT_OK = qw($ns_p2p $ns_relay);
35 :    
36 :     our $ns_p2p = "http://thefig.info/schemas/p2p_update";
37 :     our $ns_relay = "http://thefig.info/schemas/p2p_relay";
38 :    
39 : olson 1.21 my $peg_batch_size = 1000;
40 :     my $anno_batch_size = 1000;
41 :     my $assign_batch_size = 1000;
42 :    
43 : olson 1.1 =pod
44 :    
45 :     =head1 perform_update($peer)
46 :    
47 :     Perform a peer-to-peer update with the given peer. $peer is an instance of
48 :     P2P::Requestor which can connect to the peer. It is expected that the
49 :     SEED infrastructure will create this requestor appropriately for the
50 :     particular circumstance (direct connection, thru relay, etc).
51 :    
52 :     This code executes the high-level protocol, maintaining state between
53 :     calls to the peer to exchange the actual information.
54 :    
55 :     =cut
56 :    
57 :     sub perform_update
58 :     {
59 : olson 1.20 my($fig, $peer, $last_update, $skip_tough_search, $update_thru) = @_;
60 : olson 1.1
61 : olson 1.20 my $ret = $peer->request_update($last_update, $update_thru);
62 : olson 1.1
63 :     if (!$ret or ref($ret) ne "ARRAY")
64 :     {
65 : olson 1.18 die "perform_update: request_update failed\n";
66 : olson 1.1 }
67 :    
68 : olson 1.15 my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
69 : olson 1.1 $target_time, $compatible) = @$ret;
70 :    
71 : olson 1.18 print "perform_update: session=$session target=@$target_release num_annos=$num_annos\n";
72 : olson 1.1 print " num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
73 :    
74 :     #
75 : olson 1.19 # We now know the data release for our peer.
76 :     #
77 :     # Open up the peg translation cache database (a DB_File) tied
78 :     # to %peg_cache. We needn't worry about keeping it in a directory
79 :     # based on our current release, as it the cache directory is kept *in*
80 :     # the current data release directory.
81 :     #
82 :    
83 :     my $cache_handle;
84 :     my %peg_cache;
85 :     if ($target_release->[1] ne "")
86 :     {
87 :     my $cache_file = "pegcache.$target_release->[1].db";
88 :     my $cache_dir = "$FIG_Config::data/P2PQueue";
89 :     $fig->verify_dir($cache_dir);
90 :    
91 :     $cache_handle = tie(%peg_cache, "DB_File", "$cache_dir/$cache_file",
92 :     O_CREAT | O_RDWR, 0666, $DB_HASH);
93 :     $cache_handle or warn "Could not tie peg_cache to $cache_dir/$cache_file: $!\n";
94 :     }
95 :    
96 :     #
97 : olson 1.21 # peg_mapping is the local mapping from remote->local peg. This might
98 :     # be replacable by peg_cache from above.
99 : olson 1.1 #
100 : olson 1.21 my %peg_mapping;
101 : olson 1.1
102 : olson 1.21
103 : olson 1.1 #
104 : olson 1.21 # We have the information now to begin the update process. Retrieve the pegs.
105 : olson 1.1 #
106 :    
107 : olson 1.21 _compute_peg_mapping($fig, $peer, $session, $num_pegs, \%peg_mapping, \%peg_cache, $cache_handle,
108 :     $skip_tough_search);
109 : olson 1.7
110 : olson 1.19 $cache_handle->sync();
111 :     untie %peg_cache;
112 :    
113 :     #
114 : olson 1.15 # Create a list of locally-mapped annotations on a per-genome
115 :     # basis.
116 :     #
117 :    
118 :     my %genome_annos;
119 : olson 1.19
120 :     #
121 :     # %genome_assignments is a hash mapping from genome to a hashref
122 :     # that maps peg to function (since assignments are unique).
123 :     #
124 :     # (Hm. Unless two remote pegs map to the same local peg; unclear what to do
125 :     # then. Punt for now).
126 :     #
127 :     my %genome_assignments;
128 : olson 1.15
129 : olson 1.21
130 :    
131 :     #
132 :     # Retrieve the annotations, and generate a list of mapped annotations.
133 :     #
134 :    
135 :     for (my $anno_start = 0; $anno_start < $num_annos; $anno_start += $anno_batch_size)
136 : olson 1.15 {
137 : olson 1.21 my $anno_req_len = $num_annos - $anno_start;
138 :     $anno_req_len = $anno_batch_size if $anno_req_len > $anno_batch_size;
139 : olson 1.15
140 : olson 1.21 print "Retrieve $anno_req_len annos at $anno_start\n";
141 :    
142 :     my $annos = $peer->get_annotations($session, $anno_start, $anno_req_len);
143 : olson 1.15
144 : olson 1.21 for my $anno (@$annos)
145 :     {
146 :     my($his_id, $ts, $author, $anno) = @$anno;
147 :    
148 :     my $my_id = $peg_mapping{$his_id};
149 :     next unless $my_id;
150 : olson 1.15
151 : olson 1.21 my $genome = $fig->genome_of($my_id);
152 :    
153 :     push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
154 :     }
155 : olson 1.15 }
156 :    
157 : olson 1.19 #
158 :     # Do the same for the assignments
159 :     #
160 :    
161 : olson 1.20 # print Dumper($assignments);
162 :    
163 : olson 1.21
164 :     for (my $assign_start = 0; $assign_start < $num_assignments; $assign_start += $assign_batch_size)
165 : olson 1.19 {
166 : olson 1.21 my $assign_req_len = $num_assignments - $assign_start;
167 :     $assign_req_len = $assign_batch_size if $assign_req_len > $assign_batch_size;
168 : olson 1.19
169 : olson 1.21 print "Retrieve $assign_req_len assigns at $assign_start\n";
170 :    
171 :     my $assignments = $peer->get_assignments($session, $assign_start, $assign_req_len);
172 :    
173 :     for my $assign (@$assignments)
174 :     {
175 :     my($his_id, $ts, $author, $func) = @$assign;
176 : olson 1.19
177 : olson 1.21 my $my_id = $peg_mapping{$his_id};
178 :     next unless $my_id;
179 : olson 1.19
180 : olson 1.21 my $genome = $fig->genome_of($my_id);
181 : olson 1.19
182 : olson 1.21 $genome_assignments{$genome}->{$my_id} = [$my_id, $ts, $author, $func];
183 :     }
184 : olson 1.19 }
185 :    
186 :     # print Dumper(\%genome_annos);
187 : olson 1.15
188 :     #
189 :     # Now install annotations.
190 :     #
191 :    
192 : olson 1.20 open(my $old_assignments, ">old_assignments");
193 :    
194 : olson 1.15 for my $genome (keys(%genome_annos))
195 :     {
196 : olson 1.19 #
197 :     # Plan: Apply the merge_annotations.pl logic. Read the annotations
198 :     # from the per-org annotations file, add the new ones here, sort, and remove duplicates.
199 :     # Write the results to the annotations file.
200 :     #
201 :     # When we are all done, rerun the index_annotations script.
202 :     #
203 :     # Why not do that incrementally? Partly because the annotation_seeks table doesn't
204 :     # have a column for the genome id, so a removal of old data would require a
205 :     # string-match query; since a complete reindex of the annotations is pretty
206 :     # fast (60 sec on a G4 laptop on a firewire disk), it's not clear whether the incremental
207 :     # update would actually be a win.
208 :     #
209 :    
210 :     my @annos = @{$genome_annos{$genome}};
211 :     my $assignments = $genome_assignments{$genome};
212 :     #
213 :     # %assignment_annos is a hash from peg to the list
214 :     # of annotations for that peg.
215 :     #
216 :     my %assignment_annos;
217 :    
218 :     my $dir = "$FIG_Config::organisms/$genome";
219 :     my $anno_file = "$dir/annotations";
220 :     my $anno_bak = "$dir/annotations." . time;
221 :    
222 :     my $new_count = @annos;
223 :    
224 :     #
225 :     # Rename the annotations file to a new name based on the current time.
226 :     #
227 :    
228 :     if (-f $anno_file)
229 :     {
230 :     rename($anno_file, $anno_bak) or die "Cannot rename $anno_file to $anno_bak: $!";
231 :     }
232 :    
233 :     if (open(my $fh, "<$anno_bak"))
234 :     {
235 :     #
236 :     # While we are scanning here, we look for the latest local assignment
237 :     # for any peg for which we are installing an assignment.
238 :     #
239 :     local($/) = "\n//\n";
240 :    
241 :     my($chunk, $peg, $ts, $author, $anno);
242 :    
243 :     while (defined($chunk = <$fh>))
244 :     {
245 :     chomp $chunk;
246 :     ($peg, $ts, $author, $anno) = split(/\n/, $chunk, 4);
247 :    
248 :     if ($peg =~ /^fig\|/ and $ts =~ /^\d+$/)
249 :     {
250 :     my $ent = [$peg, $ts, $author, $anno];
251 :     push(@annos, $ent);
252 :    
253 :     if (defined($assignments->{$peg}))
254 :     {
255 :     #
256 :     # We have an incoming assignment for this peg.
257 :     # Don't parse anything yet, but push the annotation
258 :     # on a list so we can sort by date.
259 :     #
260 :     push(@{$assignment_annos{$peg}}, $ent);
261 :     }
262 :     }
263 :     }
264 :     close($fh);
265 :     }
266 :    
267 :     #
268 :     # Determine if we are going to install an assignment.
269 :     #
270 :    
271 :     for my $peg (keys %$assignments)
272 :     {
273 : olson 1.20 my(undef, $ts, $author, $func) = @{$assignments->{$peg}};
274 :    
275 :     #
276 :     # Sort the existing annotations for this peg by date.
277 :     #
278 :     # Recall that this list has entries [$peg, $timestamp, $author, $anno]
279 :     #
280 :    
281 :     my @eannos;
282 :     if (ref($assignment_annos{$peg}))
283 :     {
284 :     @eannos = sort { $b->[1] <=> $a->[1] } @{$assignment_annos{$peg}};
285 :     }
286 :     else
287 :     {
288 :     #
289 :     # No assignment annotations found.
290 :     #
291 :     @eannos = ();
292 :     }
293 :    
294 :     # print "Assignment annos for $peg: ", Dumper(\@eannos);
295 : olson 1.19
296 :     #
297 : olson 1.20 # Filter out just the master assignments that are newer than
298 :     # the one we are contemplating putting in place.
299 : olson 1.19 #
300 :    
301 : olson 1.20 my @cand = grep {
302 :     ($_->[1] > $ts) and ($_->[3] =~ /Set master function to/)
303 :     } @eannos;
304 :    
305 :     if (@cand > 0)
306 :     {
307 :     #
308 :     # Here is were some policy needs to be put in place --
309 :     # we have a more recent annotation on the current system.
310 :     #
311 :     # For now, we will not install an assignment if there is any
312 :     # newer assignment in place.
313 :     #
314 :    
315 :     warn "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
316 :     }
317 :     else
318 :     {
319 :     #
320 :     # Nothing is blocking us. While we are testing, just slam this assignment in.
321 :     #
322 : olson 1.19
323 : olson 1.20 my $old = $fig->function_of($peg, 'master');
324 :     print $old_assignments "$peg\t$old\n";
325 :    
326 : olson 1.21 if ($old ne $func)
327 :     {
328 :     print "Assign $peg $func\n";
329 :     $fig->assign_function($peg, 'master', $func);
330 :     }
331 : olson 1.20 }
332 : olson 1.19 }
333 :    
334 :     open(my $outfh, ">$anno_file") or die "Cannot open new annotation file $anno_file: $!\n";
335 :    
336 :     my $last;
337 :     my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @annos;
338 :     my $inst = 0;
339 :     my $dup = 0;
340 :     foreach my $ann (@sorted)
341 :     {
342 :     my $txt = join("\n", @$ann);
343 :     #
344 :     # Drop the trailing \n if there is one; we will add it back when we print and
345 :     # want to ensure the file format remains sane.
346 :     #
347 :     chomp $txt;
348 :     if ($txt ne $last)
349 :     {
350 :     print $outfh "$txt\n//\n";
351 :     $last = $txt;
352 : olson 1.21 # print "Inst $ann->[0] $ann->[1] $ann->[2]\n";
353 : olson 1.19 $inst++;
354 :     }
355 :     else
356 :     {
357 : olson 1.21 # print "Dup $ann->[0] $ann->[1] $ann->[2]\n";
358 : olson 1.19 $dup++;
359 :     }
360 :     }
361 :     close($outfh);
362 :     chmod(0666, $anno_file) or warn "Cannot chmod 0666 $anno_file: $!\n";
363 :     print "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
364 :    
365 :     #
366 : olson 1.15 # _install_genome_annos($fig, $genome, $genome_annos{$genome});
367 :     }
368 : olson 1.20 close($old_assignments);
369 : olson 1.1 }
370 :    
371 : olson 1.21 #
372 :     # Compute the peg mapping for a session.
373 :     #
374 :     # $fig Active FIG instance
375 :     # $peer P2P peer for this session.
376 :     # $session P2P session ID
377 :     # $peg_mapping Hash ref for the remote -> local PEG mapping
378 :     # $peg_cache Hash ref for the persistent remote -> local PEG mapping cache db.
379 :     # $cache_handle DB_File handle corresponding to $peg_cache.
380 :     #
381 :     sub _compute_peg_mapping
382 :     {
383 :     my($fig, $peer, $session, $num_pegs, $peg_mapping, $peg_cache, $cache_handle, $skip_tough_search) = @_;
384 :    
385 :     #
386 :     # genome_map is a hash mapping from target genome id to a list of
387 :     # pegs on the target. This is used to construct a finalize_pegs request after
388 :     # the first phase of peg mapping.
389 :     #
390 :    
391 :     my %genome_map;
392 :    
393 :     #
394 :     # target_genome_info is a hash mapping from target genome
395 :     # identifier to the target-side information on the genome -
396 :     # number of contigs, number of nucleotides, checksum.
397 :     #
398 :     # We accumulate it here across possibly multiple batches of
399 :     # peg retrievals in order to create a single finalization
400 :     # list.
401 :     #
402 :    
403 :     my %target_genome_info;
404 :    
405 :     #
406 :     # For very large transfers, we need to batch the peg processing.
407 :     #
408 :    
409 :     for (my $peg_start = 0; $peg_start < $num_pegs; $peg_start += $peg_batch_size)
410 :     {
411 :     my $peg_req_len = $num_pegs - $peg_start;
412 :     $peg_req_len = $peg_batch_size if $peg_req_len > $peg_batch_size;
413 :    
414 :     print "Getting $peg_req_len pegs at $peg_start\n";
415 :     my $ret = $peer->get_pegs($session, $peg_start, $peg_req_len);
416 :    
417 :     if (!$ret or ref($ret) ne "ARRAY")
418 :     {
419 :     die "perform_update: get_pegs failed\n";
420 :     }
421 :    
422 :     my($peg_list, $genome_list) = @$ret;
423 :    
424 :     for my $gent (@$genome_list)
425 :     {
426 :     $target_genome_info{$gent->[0]} = $gent;
427 :     }
428 :    
429 :     _compute_peg_mapping_batch($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
430 :     $peg_list, \%genome_map);
431 :     }
432 :    
433 :     #
434 :     # We have finished first pass. Now go over the per-genome mappings that need to be made.
435 :     #
436 :     # $genome_map{$genome_id} is a list of pegs that reside on that genome.
437 :     # The pegs and genome id are both target-based identifiers.
438 :     #
439 :     # %target_genome_info defines the list of genome information we have on the remote
440 :     # side.
441 :     #
442 :     # We build a request to be passed to finalize_pegs. Each entry in the request is either
443 :     # ['peg_genome', $peg] which means that we have a genome that corresponds to the
444 :     # genome the peg is in. We can attempt to map via contig locations.
445 :     #
446 :     # If that is not the case, we pass a request entry of ['peg_unknown', $peg]
447 :     # which will result in the sequence data being returned.
448 :     #
449 :    
450 :     my @finalize_req = ();
451 :    
452 :     #
453 :     # local_genome maps a target peg identifier to the local genome id it translates to.
454 :     #
455 :     my %local_genome;
456 :    
457 :     for my $genome (keys(%target_genome_info))
458 :     {
459 :     my($tg, $n_contigs, $n_nucs, $cksum) = @{$target_genome_info{$genome}};
460 :    
461 :     $tg eq $genome or die "Invalid entry in target_genome_info for $genome => $tg, $n_contigs, $n_nucs, $cksum";
462 :    
463 :     #
464 :     # Don't bother unless we have any pegs to look up.
465 :     #
466 :     next unless defined($genome_map{$genome});
467 :    
468 :     #
469 :     # Determine if we have a local genome installed that matches precisely the
470 :     # genome on the target side.
471 :     #
472 :     my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);
473 :    
474 :     my $pegs = $genome_map{$genome};
475 :    
476 :     if ($my_genome)
477 :     {
478 :     #
479 :     # We do have such a local genome. Generate a peg_genome request to
480 :     # get the location information from the target side.
481 :     #
482 :     # Also remember the local genome mapping for this peg.
483 :     #
484 :    
485 :     print "$genome mapped to $my_genome\n";
486 :     for my $peg (@$pegs)
487 :     {
488 :     push(@finalize_req, ['peg_genome', $peg]);
489 :     $local_genome{$peg} = $my_genome;
490 :     }
491 :    
492 :     }
493 :     else
494 :     {
495 :     #
496 :     # We don't have such a genome. We need to retrieve the
497 :     # sequence data in order to finish mapping.
498 :     #
499 :     push(@finalize_req, map { ['peg_unknown', $_] } @$pegs);
500 :     }
501 :     }
502 :    
503 :     #
504 :     # We've built our finalization request. Handle it (possibly with batching here too).
505 :     #
506 :    
507 :     _process_finalization_request($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
508 :     \%local_genome, \@finalize_req, $skip_tough_search);
509 :    
510 :     }
511 :    
512 :     #
513 :     # Process one batch of PEGs.
514 :     #
515 :     # Same args as _compute_peg_mapping, with the addition of:
516 :     #
517 :     # $peg_list List of pegs to be processed
518 :     # $genome_map Hash maintaining list of genomes with their pegs.
519 :     # $target_genome_info Hash maintaining overall list of target-side genome information.
520 :     #
521 :     sub _compute_peg_mapping_batch
522 :     {
523 :     my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
524 :     $peg_list, $genome_map, $target_genome_info) = @_;
525 :    
526 :     #
527 :     # Walk the list of pegs as returned from get_pegs() and determine what has to
528 :     # be done.
529 :     #
530 :     # If the entry is ['peg', $peg], we can use the peg ID as is.
531 :     #
532 :     # If the entry is ['peg_info', $peg, $alias_list, $genome], the peg
533 :     # has the given aliases, and is in the given genome.
534 :     #
535 :     for my $peg_info (@$peg_list)
536 :     {
537 :     my($key, $peg, @rest) = @$peg_info;
538 :    
539 :     if ($key eq 'peg')
540 :     {
541 :     #
542 :     # Peg id is directly usable.
543 :     #
544 :     $peg_mapping->{$peg} = $peg;
545 :     }
546 :     elsif ($key eq 'peg_info')
547 :     {
548 :     #
549 :     # Peg id not directly usable. See if we have it in the cache.
550 :     #
551 :    
552 :     if ((my $cached = $peg_cache->{$peg}) ne "")
553 :     {
554 :     #
555 :     # Cool, we've cached the result. Use it.
556 :     #
557 :    
558 :     $peg_mapping->{$peg} = $cached;
559 :     # warn "Found cached mapping $peg => $cached\n";
560 :     next;
561 :     }
562 :    
563 :     #
564 :     # It is not cached. Attempt to resolve by means of alias IDs.
565 :     #
566 :    
567 :     my($alias_list, $genome_id) = @rest;
568 :    
569 :     for my $alias (@$alias_list)
570 :     {
571 :     my $mapped = $fig->by_alias($alias);
572 :     if ($mapped)
573 :     {
574 :     print "$peg maps to $mapped via $alias\n";
575 :     $peg_mapping->{$peg}= $mapped;
576 :     $peg_cache->{$peg} = $mapped;
577 :     last;
578 :     }
579 :     }
580 :    
581 :     #
582 :     # If we weren't able to resolve by ID,
583 :     # add to %genome_map as a PEG that will need
584 :     # to be resolved by means of contig location.
585 :     #
586 :    
587 :     if (!defined($peg_mapping->{$peg}))
588 :     {
589 :     push(@{$genome_map->{$genome_id}}, $peg);
590 :     print "$peg did not map on first pass\n";
591 :     }
592 :     }
593 :     }
594 :    
595 :     #
596 :     # Flush the cache to write out any computed mappings.
597 :     #
598 :     $cache_handle->sync();
599 :    
600 :     }
601 :    
602 :     sub _process_finalization_request
603 :     {
604 :     my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
605 :     $local_genome, $finalize_req, $skip_tough_search) = @_;
606 :    
607 :     #
608 :     # Immediately return unless there's something to do.
609 :     #
610 :     return unless ref($finalize_req) and @$finalize_req > 0;
611 : olson 1.1
612 : olson 1.21 my $fin_batch_size = 50;
613 :    
614 :     while (@$finalize_req > 0)
615 :     {
616 :     my @req = splice(@$finalize_req, 0, $fin_batch_size);
617 :    
618 :     print "Invoking finalize_pegs on ", int(@req), " pegs\n";
619 :     my $ret = $peer->finalize_pegs($session, \@req);
620 :    
621 :     if (!$ret or ref($ret) ne "ARRAY")
622 :     {
623 :     die "perform_update: finalize_pegs failed\n";
624 :     }
625 :    
626 :     #
627 :     # The return is a list of either location entries or
628 :     # sequence data. Attempt to finish up the mapping.
629 :     #
630 :    
631 :     my(%sought, %sought_seq);
632 :    
633 :    
634 :     my $dbh = $fig->db_handle();
635 :     for my $entry (@$ret)
636 :     {
637 :     my($what, $peg, @rest) = @$entry;
638 :    
639 :     if ($what eq "peg_loc")
640 :     {
641 :     my($strand, $start, $end, $cksum, $seq) = @rest;
642 :    
643 :     #
644 :     # We have a contig location. Try to find a matching contig
645 :     # here, and see if it maps to something.
646 :     #
647 :    
648 :     my $my_genome = $local_genome->{$peg};
649 :     my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
650 :     if ($local_contig)
651 :     {
652 :     #
653 :     # Now look up the local peg. We match on the end location; depending on the strand
654 :     # the feature is on, we want to look at either minloc or maxloc.
655 :     #
656 :    
657 :     my $whichloc = $strand eq '-' ? "minloc" : "maxloc";
658 :    
659 :     my $res = $dbh->SQL(qq!SELECT id from features
660 :     WHERE $whichloc = $end and genome = '$my_genome' and
661 :     contig = '$local_contig'
662 :     !);
663 :    
664 :     if ($res and @$res > 0)
665 :     {
666 :     my(@ids) = map { $_->[0] } @$res;
667 :     my $id = $ids[0];
668 :     $peg_mapping->{$peg} = $id;
669 :     $peg_cache->{$peg} = $id;
670 :     print "Mapped $peg to $id via contigs\n";
671 :     if (@$res > 1)
672 :     {
673 :     warn "Multiple mappings found for $peg: @ids\n";
674 :     }
675 :     }
676 :     else
677 :     {
678 :     print "failed: $peg $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
679 :     $sought{$peg}++;
680 :     $sought_seq{$peg} = $seq;
681 :     }
682 :     }
683 :     else
684 :     {
685 :     print "Mapping failed for $my_genome checksum $cksum\n";
686 :     $sought{$peg}++;
687 :     $sought_seq{$peg} = $seq;
688 :     }
689 :     }
690 :     elsif ($what eq "peg_seq")
691 :     {
692 :     my($seq) = @rest;
693 :    
694 :     $sought{$peg}++;
695 :     $sought_seq{$peg} = $seq;
696 :     }
697 :     }
698 :    
699 :     #
700 :     # Now see if we need to do a tough search.
701 :     #
702 :    
703 :     if (keys(%sought) > 0 and !$skip_tough_search)
704 :     {
705 :     my %trans;
706 :    
707 :     print "Starting tough search\n";
708 :    
709 :     $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
710 :     print "Tough search translated: \n";
711 :     while (my($tpeg, $ttrans) = each(%trans))
712 :     {
713 :     print " $tpeg -> $ttrans\n";
714 :     $peg_mapping->{$tpeg} = $ttrans;
715 :     $peg_cache->{$tpeg} = $ttrans;
716 :     }
717 :     }
718 :     }
719 :     }
720 : olson 1.15
721 : olson 1.1 #############
722 :     #
723 :     # P2P Relay
724 :     #
725 :     #############
726 :    
727 :    
728 :     package P2P::Relay;
729 :     use strict;
730 :    
731 :     use Data::Dumper;
732 :     use SOAP::Lite;
733 :    
734 :     use P2P;
735 :    
736 :     sub new
737 :     {
738 :     my($class, $url) = @_;
739 :    
740 :     my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy($url);
741 :    
742 :     my $self = {
743 :     url => $url,
744 :     proxy => $proxy,
745 :     };
746 :     return bless($self, $class);
747 :     }
748 :    
749 :     sub enumerate_annotation_systems
750 :     {
751 :     my($self) = @_;
752 :    
753 :     return $self->{proxy}->enumerate_annotation_systems()->result;
754 :     }
755 :    
756 :     sub fetch_queries
757 :     {
758 :     my($self, $id) = @_;
759 :    
760 :     my $reply = $self->{proxy}->fetch_queries($id);
761 :    
762 :     if ($reply->fault)
763 :     {
764 :     print "Failed to fetch queries: ", $reply->faultcode, " ", $reply->faultstring, "\n";
765 :     return undef;
766 :     }
767 :    
768 :     return $reply->result;
769 :     }
770 :    
771 :     sub deposit_answer
772 :     {
773 :     my($self, $id, $key, $answer) = @_;
774 :    
775 :     my $reply = $self->{proxy}->deposit_answer($id, $key,
776 :     SOAP::Data->type('base64')->value($answer));
777 :    
778 :     if ($reply->fault)
779 :     {
780 :     print "deposit_answer got fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
781 :     return undef;
782 :     }
783 :    
784 :     return $reply;
785 :     }
786 :    
787 :     =pod
788 :    
789 :     =head1 await_result
790 :    
791 :     Await the result from a possibly-asynchronous soap request.
792 :    
793 :     Look at the reply that we have. If it's a deferred reply, loop polling
794 :     the relay for the actual result.
795 :    
796 :     We determine if the reply is a deferred reply by examining the namespace
797 :     URI of the response. A response will be generated from the relay's namespace,
798 :     rather than that of the application itself.
799 :    
800 :     =cut
801 :    
802 :     sub await_result
803 :     {
804 :     my($self, $reply) = @_;
805 :    
806 :     while (1)
807 :     {
808 :     #
809 :     # Retrieve the namespace of the response, which is the first
810 :     # element in the body of the message.
811 :     #
812 :     my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');
813 : olson 1.20 # print "Reply ns=$ns want $P2P::ns_relay\n";
814 : olson 1.1
815 :     if ($ns eq $P2P::ns_relay)
816 :     {
817 :     my $val = $reply->result;
818 : olson 1.20 # print "got val=", Dumper($val);
819 : olson 1.1 if ($val->[0] eq 'deferred')
820 :     {
821 :     #
822 :     # Sleep a little, then try to retrieve the response.
823 :     #
824 :    
825 :     sleep(1);
826 :     my $id = $val->[1];
827 :    
828 :     print "Retrieving reply\n";
829 :     $reply = $self->{proxy}->call_completed($id);
830 :     }
831 :     else
832 :     {
833 :     #
834 :     # We're not sure what to do here..
835 :     #
836 :     return undef;
837 :     }
838 :     }
839 :     else
840 :     {
841 :     #
842 :     # We got an actual response. Return it.
843 :     #
844 :    
845 :     return $reply;
846 :     }
847 :     }
848 :     }
849 :    
850 :     #############
851 :     #
852 :     # P2P Requestor
853 :     #
854 :     #############
855 :    
856 :     package P2P::Requestor;
857 :     use strict;
858 :    
859 :     use Data::Dumper;
860 : olson 1.15 use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
861 : olson 1.1
862 :     use SOAP::Lite;
863 : olson 1.15
864 :     #use SOAP::Lite +trace => [qw(transport dispatch result debug)];
865 : olson 1.1 use P2P;
866 :    
867 :     #
868 :     # Create a new Requestor. It contains a reference to the FIG instance
869 :     # so that we can run the protocol completely from in here.
870 :     #
871 :    
872 :     sub new
873 :     {
874 :     my($class, $fig, $url, $peer_id, $relay) = @_;
875 :    
876 : olson 1.17 my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url, timeout => 3600);
877 : olson 1.1
878 :     my $self = {
879 :     fig => $fig,
880 :     url => $url,
881 :     peer_id => $peer_id,
882 :     proxy => $proxy,
883 :     relay => $relay,
884 :     };
885 :     return bless($self, $class);
886 :     }
887 :    
888 :     #
889 :     # First step: Request an update.
890 :     #
891 :     # We need to determine some notion of what our release is, since we are not
892 :     # currently tagging them explicitly. Until we delve into this more,
893 :     # I am going to return a null release, which means the same-release
894 :     # optimization won't be able to kick in.
895 :     #
896 :     # We also need to determine the last time we got an update from this
897 :     # system.
898 :     #
899 :    
900 :     sub request_update
901 :     {
902 : olson 1.20 my($self, $last_update, $update_thru) = @_;
903 : olson 1.1
904 : olson 1.18 my $rel = [$self->{fig}->get_release_info()];
905 : olson 1.1
906 :     if (!defined($last_update))
907 :     {
908 :     $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});
909 :     }
910 : olson 1.17
911 :     print "Requesting update via $self->{proxy}\n";
912 : olson 1.20 my $reply = $self->{proxy}->request_update($rel, $last_update, $update_thru);
913 :     # print "Got reply ", Dumper($reply);
914 : olson 1.1
915 :     if ($self->{relay})
916 :     {
917 :     $reply = $self->{relay}->await_result($reply);
918 :     }
919 :    
920 :     if ($reply->fault)
921 :     {
922 :     print "request_update triggered fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
923 :     return undef;
924 :     }
925 :    
926 :     return $reply->result;
927 :     }
928 :    
929 :     =pod
930 :    
931 :     =head1 get_pegs($session_id, $start, $length)
932 :    
933 :    
934 :     =cut
935 :    
936 :     sub get_pegs
937 :     {
938 :     my($self, $session_id, $start, $length) = @_;
939 :    
940 :     return $self->call("get_pegs", $session_id, $start, $length);
941 :     }
942 :    
943 : olson 1.6 sub finalize_pegs
944 :     {
945 :     my($self, $session_id, $request) = @_;
946 :    
947 :     return $self->call("finalize_pegs", $session_id, $request);
948 :     }
949 :    
950 : olson 1.15 sub get_annotations
951 :     {
952 :     my($self, $session_id, $start, $length) = @_;
953 :    
954 :     return $self->call("get_annotations", $session_id, $start, $length);
955 :     }
956 :    
957 : olson 1.19 sub get_assignments
958 :     {
959 :     my($self, $session_id, $start, $length) = @_;
960 :    
961 :     return $self->call("get_assignments", $session_id, $start, $length);
962 :     }
963 :    
964 : olson 1.1 sub call
965 :     {
966 :     my($self, $func, @args) = @_;
967 : olson 1.15
968 :     my $t0 = [gettimeofday()];
969 :     print "Calling $func\n";
970 : olson 1.1 my $reply = $self->{proxy}->$func(@args);
971 : olson 1.15 my $t1 = [gettimeofday()];
972 :    
973 :     my $elap = tv_interval($t0, $t1);
974 :     print "Call to $func took $elap\n";
975 : olson 1.1
976 :     if ($self->{relay})
977 :     {
978 :     $reply = $self->{relay}->await_result($reply);
979 :     }
980 :    
981 :     if ($reply->fault)
982 :     {
983 :     print "$func triggered fault: ", $reply->faultcode, " ", $reply->faultstring, "\n";
984 :     return undef;
985 :     }
986 :    
987 :     return $reply->result;
988 :     }
989 :    
990 :    
991 :     #############
992 :     #
993 :     # P2P Service
994 :     #
995 :     # Code in this module is invoked on the target on behalf of a requestor.
996 :     #
997 :     #############
998 :    
999 :     package P2P::Service;
1000 :    
1001 :     use Data::Dumper;
1002 :    
1003 :     use FIG;
1004 :     use FIG_Config;
1005 :     use strict;
1006 :    
1007 :     use File::Temp qw(tempdir);
1008 :     use File::Basename;
1009 :    
1010 :     sub request_update
1011 :     {
1012 : olson 1.20 my($class, $his_release, $last_update, $update_thru)= @_;
1013 : olson 1.1
1014 :     #
1015 :     # Verify input.
1016 :     #
1017 :    
1018 :     if ($last_update !~ /^\d+$/)
1019 :     {
1020 :     die "request_update: last_update must be a number (not '$last_update')\n";
1021 :     }
1022 :    
1023 : olson 1.20 if ($update_thru eq "")
1024 :     {
1025 :     $update_thru = time + 10000;
1026 :     }
1027 :    
1028 : olson 1.1 #
1029 :     # Create a new session id and a spool directory to use for storage
1030 :     # of information about it. This can go in the tempdir since it is
1031 :     # not persistent.
1032 :     #
1033 :    
1034 :     &FIG::verify_dir("$FIG_Config::temp/p2p_spool");
1035 :     #my $spool_dir = tempdir(DIR => "$FIG_Config::temp/p2p_spool");
1036 :    
1037 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/test";
1038 :     &FIG::verify_dir($spool_dir);
1039 :    
1040 :     my $session_id = basename($spool_dir);
1041 :     my $now = time;
1042 :    
1043 :     #
1044 :     # Gather the list of pegs and annotations for the update.
1045 :     #
1046 :    
1047 :     my $fig = new FIG;
1048 :    
1049 :     my $all_genomes = [$fig->genomes];
1050 :    
1051 :     my %all_genomes = map { $_ => 1 } @$all_genomes;
1052 :    
1053 :     my %pegs;
1054 : olson 1.15
1055 :     #
1056 :     # We keep track of usernames that have been seen, so that
1057 :     # we can both update our local user database and
1058 :     # we can report them to our peer.
1059 :     #
1060 :    
1061 :     my %users;
1062 : olson 1.1
1063 :     my $num_annos = 0;
1064 :     my $num_genomes = 0;
1065 :     my $num_pegs = 0;
1066 : olson 1.15 my $num_assignments = 0;
1067 : olson 1.1
1068 :     my $anno_fh;
1069 :     open($anno_fh, ">$spool_dir/annos");
1070 :    
1071 :     my $peg_fh;
1072 :     open($peg_fh, ">$spool_dir/pegs");
1073 :    
1074 :     my $genome_fh;
1075 :     open($genome_fh, ">$spool_dir/genomes");
1076 :    
1077 : olson 1.15 my $assign_fh;
1078 :     open($assign_fh, ">$spool_dir/assignments");
1079 :    
1080 : olson 1.1 for my $genome (@$all_genomes)
1081 :     {
1082 :     my $num_annos_for_genome = 0;
1083 : olson 1.15 my %assignment;
1084 : olson 1.1
1085 :     my $genome_dir = "$FIG_Config::organisms/$genome";
1086 :     next unless -d $genome_dir;
1087 :    
1088 :     my $afh;
1089 :     if (open($afh, "$genome_dir/annotations"))
1090 :     {
1091 :     my($fid, $anno_time, $who, $anno_text);
1092 :     local($/);
1093 :     $/ = "//\n";
1094 :     while (my $ann = <$afh>)
1095 :     {
1096 :     chomp $ann;
1097 :    
1098 :     if ((($fid, $anno_time, $who, $anno_text) =
1099 :     ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and
1100 : olson 1.20 $anno_time > $last_update and
1101 :     $anno_time < $update_thru)
1102 : olson 1.1
1103 :     {
1104 :     #
1105 : olson 1.15 # Update users list.
1106 :     #
1107 :    
1108 :     $users{$who}++;
1109 :    
1110 :     #
1111 : olson 1.1 # Look up aliases if we haven't seen this fid before.
1112 :     #
1113 :    
1114 :     if (!defined($pegs{$fid}))
1115 :     {
1116 :     my @aliases = $fig->feature_aliases($fid);
1117 :    
1118 :     print $peg_fh join("\t", $fid, $genome, @aliases), "\n";
1119 :     $num_pegs++;
1120 :     }
1121 :    
1122 :     print $anno_fh "$ann//\n";
1123 :    
1124 :     $pegs{$fid}++;
1125 :    
1126 :     $num_annos_for_genome++;
1127 :     $num_annos++;
1128 : olson 1.15
1129 :     #
1130 :     # While we're here, see if this is an assignment. We check in the
1131 :     # %assignment hash, which is keyed on fid, to see if we already
1132 :     # saw an assignment for this fid. If we have, we keep this one only if
1133 :     # the assignment time on it is later than the one we saw already.
1134 :     #
1135 :     # We are only looking at master assignments for now. We will need
1136 :     # to return to this issue and reexamine it, but in order to move
1137 :     # forward I am only matching master assignments.
1138 :     #
1139 :    
1140 :     if ($anno_text =~ /Set master function to\n(\S[^\n]+\S)/)
1141 :     {
1142 :     my $func = $1;
1143 :    
1144 :     my $other = $assignment{$fid};
1145 :    
1146 :     #
1147 :     # If we haven't seen an assignment for this fid,
1148 :     # or if it the other assignment has a timestamp that
1149 :     # is earlier than this one, set the assignment.
1150 :     #
1151 :    
1152 :     if (!defined($other) or
1153 :     ($other->[1] < $anno_time))
1154 :     {
1155 :     $assignment{$fid} = [$fid, $anno_time, $who, $func];
1156 :     }
1157 :     }
1158 : olson 1.1 }
1159 :     }
1160 :     close($afh);
1161 : olson 1.15
1162 :     #
1163 :     # Write out the assignments that remain.
1164 :     #
1165 :    
1166 :     for my $fid (sort keys(%assignment))
1167 :     {
1168 :     print $assign_fh join("\t", @{$assignment{$fid}}), "\n";
1169 :     $num_assignments++;
1170 :     }
1171 : olson 1.1 }
1172 : olson 1.15
1173 : olson 1.1
1174 :     #
1175 :     # Determine genome information if we have annotations for this one.
1176 :     #
1177 :    
1178 :     if ($num_annos_for_genome > 0)
1179 :     {
1180 :     $num_genomes++;
1181 :     if (open(my $cfh, "<$genome_dir/COUNTS"))
1182 :     {
1183 :     if ($_ = <$cfh>)
1184 :     {
1185 :     chomp;
1186 :     my($cgenome, $n_contigs, $total_nucs, $cksum) = split(/\t/, $_);
1187 :     if ($cgenome ne $genome)
1188 :     {
1189 :     warn "Hm, $genome has a COUNTS file with genome=$cgenome that does not match\n";
1190 :     }
1191 :     else
1192 :     {
1193 :     print $genome_fh join("\t",
1194 :     $genome, $num_annos_for_genome, $n_contigs,
1195 :     $total_nucs, $cksum), "\n";
1196 :     }
1197 :     }
1198 :     }
1199 :     }
1200 :    
1201 :     }
1202 :     close($anno_fh);
1203 :     close($peg_fh);
1204 :     close($genome_fh);
1205 : olson 1.15 close($assign_fh);
1206 : olson 1.1
1207 :     print "Pegs: $num_pegs\n";
1208 :     print "Genomes: $num_genomes\n";
1209 :     print "Annos: $num_annos\n";
1210 :    
1211 :     #
1212 :     # Check compatibility.
1213 :     #
1214 :    
1215 : olson 1.18 my $my_release = [$fig->get_release_info()];
1216 :    
1217 :     #
1218 :     # Release id is $my_release->[1].
1219 :     #
1220 :    
1221 :     my $compatible;
1222 :     if ($my_release->[1] ne "" and $his_release->[1] ne "")
1223 :     {
1224 :     #
1225 :     # Both releases must be defined for them to be compatible.
1226 :     #
1227 :     # At some point we need to consider the derived-release issue.
1228 :     #
1229 :    
1230 :     $compatible = $my_release->[1] eq $his_release->[1];
1231 :     }
1232 :     else
1233 :     {
1234 :     $compatible = 0;
1235 :     }
1236 : olson 1.1
1237 :     open(my $fh, ">$spool_dir/INFO");
1238 :     print $fh "requestor_release\t$his_release\n";
1239 :     print $fh "last_update\t$last_update\n";
1240 : olson 1.20 print $fh "update_thru\t$update_thru\n";
1241 : olson 1.1 print $fh "cur_update\t$now\n";
1242 :     print $fh "target_release\t$my_release\n";
1243 :     print $fh "compatible\t$compatible\n";
1244 :     print $fh "num_pegs\t$num_pegs\n";
1245 :     print $fh "num_genomes\t$num_genomes\n";
1246 :     print $fh "num_annos\t$num_annos\n";
1247 : olson 1.15 print $fh "num_assignments\t$num_assignments\n";
1248 : olson 1.1 close($fh);
1249 :    
1250 : olson 1.15 #
1251 :     # Construct list of users, and pdate local user database.
1252 :     #
1253 :    
1254 :     my @users = keys(%users);
1255 : olson 1.17 # $fig->ensure_users(\@users);
1256 : olson 1.15
1257 :     return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
1258 : olson 1.16 $now, $compatible, \@users];
1259 : olson 1.1 }
1260 :    
1261 :    
1262 :     sub get_pegs
1263 :     {
1264 :     my($self, $session_id, $start, $len) = @_;
1265 :     my(%session_info);
1266 :    
1267 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1268 :    
1269 :     -d $spool_dir or die "Invalid session id $session_id";
1270 :    
1271 :     #
1272 :     # Read in the cached information for this session.
1273 :     #
1274 :    
1275 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1276 :     while (<$info_fh>)
1277 :     {
1278 :     chomp;
1279 :     my($var, $val) = split(/\t/, $_, 2);
1280 :     $session_info{$var} = $val;
1281 :     }
1282 :     close($info_fh);
1283 :    
1284 :     #
1285 :     # Sanity check start and length.
1286 :     #
1287 :    
1288 :     if ($start < 0 or $start >= $session_info{num_pegs})
1289 :     {
1290 :     die "Invalid start position $start";
1291 :     }
1292 :    
1293 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_pegs})
1294 :     {
1295 :     die "Invalid length $len";
1296 :     }
1297 :    
1298 :     #
1299 :     # Open file, spin to the starting line, then start reading.
1300 :     #
1301 :    
1302 :     open(my $peg_fh, "<$spool_dir/pegs") or die "Cannot open pegs file: $!";
1303 :    
1304 :     my $peg_output = [];
1305 :     my $genome_output = [];
1306 :    
1307 :     my $peg_num = 0;
1308 :     my $genomes_to_show = [];
1309 :     my %genomes_to_show;
1310 :    
1311 :     my($fid, $genome, @aliases);
1312 :    
1313 :     while (<$peg_fh>)
1314 :     {
1315 :     next if ($peg_num < $start);
1316 :    
1317 :     last if ($peg_num > ($start + $len));
1318 :    
1319 :     chomp;
1320 :    
1321 :     #
1322 :     # OK, this is a peg to process.
1323 :     # It's easy if we're compatible.
1324 :     #
1325 :    
1326 :     ($fid, $genome, @aliases) = split(/\t/, $_);
1327 :    
1328 :     if ($session_info{compatible})
1329 :     {
1330 :     push(@$peg_output, ['peg', $fid]);
1331 :     }
1332 :     else
1333 :     {
1334 :     if (!$genomes_to_show{$genome})
1335 :     {
1336 :     push(@$genomes_to_show, $genome);
1337 :     $genomes_to_show{$genome}++;
1338 :     }
1339 :     push(@$peg_output, ['peg_info', $fid, [@aliases], $genome]);
1340 :     }
1341 :     }
1342 :     continue
1343 :     {
1344 :     $peg_num++;
1345 :     }
1346 :    
1347 :     #
1348 :     # Read the genomes file, returning information about genomes referenced
1349 :     # in the pegs returned.
1350 :     #
1351 :    
1352 :     my $n_left = @$genomes_to_show;
1353 :    
1354 :     open(my $gfh, "<$spool_dir/genomes") or die "Cannot open genomes file: $!";
1355 :     while ($n_left > 0 and $_ = <$gfh>)
1356 :     {
1357 :     chomp;
1358 :    
1359 :     my($genome, $n_annos, $n_contigs, $n_nucs, $cksum) = split(/\t/);
1360 :    
1361 :     if ($genomes_to_show{$genome})
1362 :     {
1363 :     push(@$genome_output, [$genome, $n_contigs, $n_nucs, $cksum]);
1364 :     $n_left--;
1365 :     }
1366 :     }
1367 :     close($gfh);
1368 :    
1369 :     return [$peg_output, $genome_output];
1370 :     }
1371 : olson 1.6
1372 :     sub finalize_pegs
1373 :     {
1374 :     my($self, $session, $request) = @_;
1375 :     my($out);
1376 :    
1377 :     my $fig = new FIG;
1378 :    
1379 :     #
1380 :     # Walk the request handling appropriately. This is fairly easy, as it
1381 :     # is just a matter of pulling either sequence or location/contig data.
1382 :     #
1383 :    
1384 :     for my $item (@$request)
1385 :     {
1386 :     my($what, $peg) = @$item;
1387 :    
1388 :     if ($what eq "peg_genome")
1389 :     {
1390 :     #
1391 :     # Return the location and contig checksum for this peg.
1392 :     #
1393 : olson 1.13 # We also include the sequence in case the contig mapping doesn't work.
1394 :     #
1395 : olson 1.6
1396 :     my $loc = $fig->feature_location($peg);
1397 :     my $contig = $fig->contig_of($loc);
1398 : olson 1.7 my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);
1399 : olson 1.13 my $seq = $fig->get_translation($peg);
1400 : olson 1.6
1401 :     push(@$out, ['peg_loc', $peg,
1402 : olson 1.13 $fig->strand_of($peg),
1403 : olson 1.6 $fig->beg_of($loc), $fig->end_of($loc),
1404 : olson 1.13 $cksum, $seq]);
1405 : olson 1.6
1406 :     }
1407 : olson 1.7 elsif ($what eq "peg_unknown")
1408 : olson 1.6 {
1409 :     my $seq = $fig->get_translation($peg);
1410 :     push(@$out, ['peg_seq', $peg, $seq]);
1411 :     }
1412 :     }
1413 :     return $out;
1414 :     }
1415 :    
1416 : olson 1.15
1417 :     sub get_annotations
1418 :     {
1419 :     my($self, $session_id, $start, $len) = @_;
1420 :    
1421 :     #
1422 :     # This is now easy; just run thru the saved annotations and return.
1423 :     #
1424 :    
1425 :     my(%session_info);
1426 :    
1427 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1428 :    
1429 :     -d $spool_dir or die "Invalid session id $session_id";
1430 :    
1431 :     #
1432 :     # Read in the cached information for this session.
1433 :     #
1434 :    
1435 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1436 :     while (<$info_fh>)
1437 :     {
1438 :     chomp;
1439 :     my($var, $val) = split(/\t/, $_, 2);
1440 :     $session_info{$var} = $val;
1441 :     }
1442 :     close($info_fh);
1443 :    
1444 :     #
1445 :     # Sanity check start and length.
1446 :     #
1447 :    
1448 :     if ($start < 0 or $start >= $session_info{num_annos})
1449 :     {
1450 :     die "Invalid start position $start";
1451 :     }
1452 :    
1453 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_annos})
1454 :     {
1455 :     die "Invalid length $len";
1456 :     }
1457 :    
1458 :     #
1459 :     # Open file, spin to the starting line, then start reading.
1460 :     #
1461 :    
1462 :     open(my $anno_fh, "<$spool_dir/annos") or die "Cannot open annos file: $!";
1463 :    
1464 :     my $anno_output = [];
1465 :    
1466 :     my $anno_num = 0;
1467 :    
1468 :     local $/ = "//\n";
1469 :     while (<$anno_fh>)
1470 :     {
1471 :     next if ($anno_num < $start);
1472 :    
1473 :     last if ($anno_num > ($start + $len));
1474 :    
1475 :     chomp;
1476 :    
1477 :     my($id, $date, $author, $anno) = split(/\n/, $_, 4);
1478 :    
1479 :     push(@$anno_output, [$id, $date, $author, $anno]);
1480 :     }
1481 :     continue
1482 :     {
1483 :     $anno_num++;
1484 :     }
1485 :    
1486 :     return $anno_output;
1487 :     }
1488 : olson 1.19
1489 :     sub get_assignments
1490 :     {
1491 :     my($self, $session_id, $start, $len) = @_;
1492 :    
1493 :     #
1494 :     # This is now easy; just run thru the saved assignments and return.
1495 :     #
1496 :    
1497 :     my(%session_info);
1498 :    
1499 :     my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1500 :    
1501 :     -d $spool_dir or die "Invalid session id $session_id";
1502 :    
1503 :     #
1504 :     # Read in the cached information for this session.
1505 :     #
1506 :    
1507 :     open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1508 :     while (<$info_fh>)
1509 :     {
1510 :     chomp;
1511 :     my($var, $val) = split(/\t/, $_, 2);
1512 :     $session_info{$var} = $val;
1513 :     }
1514 :     close($info_fh);
1515 :    
1516 :     #
1517 :     # Sanity check start and length.
1518 :     #
1519 :    
1520 :     if ($start < 0 or $start >= $session_info{num_assignments})
1521 :     {
1522 :     die "Invalid start position $start";
1523 :     }
1524 :    
1525 :     if ($len < 0 or ($start + $len - 1) >= $session_info{num_assignments})
1526 :     {
1527 :     die "Invalid length $len";
1528 :     }
1529 :    
1530 :     #
1531 :     # Open file, spin to the starting line, then start reading.
1532 :     #
1533 :    
1534 :     open(my $assign_fh, "<$spool_dir/assignments") or die "Cannot open assignments file: $!";
1535 :    
1536 :     my $assign_output = [];
1537 :    
1538 :     my $assign_num = 0;
1539 :    
1540 :     while (<$assign_fh>)
1541 :     {
1542 :     next if ($assign_num < $start);
1543 :    
1544 :     last if ($assign_num > ($start + $len));
1545 :    
1546 :     chomp;
1547 :    
1548 :     my($id, $date, $author, $func) = split(/\t/, $_, 4);
1549 :    
1550 :     push(@$assign_output, [$id, $date, $author, $func]);
1551 :     }
1552 :     continue
1553 :     {
1554 :     $assign_num++;
1555 :     }
1556 :    
1557 :     return $assign_output;
1558 :     }
1559 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3