[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Mon Oct 4 15:56:29 2004 UTC revision 1.26, Tue Jan 11 14:06:18 2005 UTC
# Line 18  Line 18 
18    
19  use FIG_Config;  use FIG_Config;
20    
21    use AnyDBM_File;
22    use Fcntl;
23    
24  use strict;  use strict;
25  use Exporter;  use Exporter;
26  use base qw(Exporter);  use base qw(Exporter);
# Line 33  Line 36 
36  our $ns_p2p = "http://thefig.info/schemas/p2p_update";  our $ns_p2p = "http://thefig.info/schemas/p2p_update";
37  our $ns_relay = "http://thefig.info/schemas/p2p_relay";  our $ns_relay = "http://thefig.info/schemas/p2p_relay";
38    
39    my $peg_batch_size = 1000;
40    my $anno_batch_size = 1000;
41    my $assign_batch_size = 1000;
42    my $fin_batch_size = 1000;
43    
44    my $log_fh;
45    my $html_fh;
46    
47  =pod  =pod
48    
49  =head1 perform_update($peer)  =head1 perform_update($peer, $last_update, $skip_tough_search, $update_thru, $log_file, $html_file, $assignment_policy))
50    
51  Perform a peer-to-peer update with the given peer. $peer is an instance of  Perform a peer-to-peer update with the given peer. $peer is an instance of
52  P2P::Requestor which can connect to the peer. It is expected that the  P2P::Requestor which can connect to the peer. It is expected that the
# Line 45  Line 56 
56  This code executes the high-level protocol, maintaining state between  This code executes the high-level protocol, maintaining state between
57  calls to the peer to exchange the actual information.  calls to the peer to exchange the actual information.
58    
59        $last_update: Search for updates since this time.
60        $skip_tough_search: Do not use the time-consuming $fig->tough_search method as a last resort for peg mapping.
61        $update_thru: Search for updates until this time. Undef means to search for all since $last_update.
62        $log_file: Write logging information to this file.
63        $html_file: Write a HTML summary to this file.
64        $assignment_policy: If a list reference, contains the list of users from which we will accept assignments. If a code ref, a predicate that is passed ($peg, $timestamp, $author, $function) and returns true if the assignment should be made.
65    
66  =cut  =cut
67    
68  sub perform_update  sub perform_update
69  {  {
70      my($fig, $peer, $last_update) = @_;      my($fig, $peer, $last_update, $skip_tough_search, $update_thru, $log_file, $html_file,
71           $assignment_policy) = @_;
72    
73        my $allow_assignment;
74    
75        $log_file = "/dev/null" unless $log_file ne "";
76        open($log_fh, ">>$log_file") or die "Cannot open logfile $log_file: $!\n";
77        $log_fh->autoflush(1);
78    
79        $html_file = "/dev/null" unless $html_file ne "";
80        open($html_fh, ">$html_file") or die "Cannot open htmlfile $html_file: $!\n";
81        $html_fh->autoflush(1);
82    
83        if (ref($assignment_policy) eq "CODE")
84        {
85            $allow_assignment = $assignment_policy;
86        }
87        elsif (ref($assignment_policy) eq "ARRAY")
88        {
89            my $ahash = {};
90            map { $ahash->{$_}++; } @$assignment_policy;
91            $allow_assignment = sub {
92                return $ahash->{$_[2]};
93            };
94        }
95        elsif (ref($assignment_policy) eq "HASH")
96        {
97            $allow_assignment = sub {
98                return $assignment_policy->{$_[2]};
99            };
100        }
101        else
102        {
103            print $log_fh "Invalid assignment policy $assignment_policy\n";
104            die "Invalid assignment policy $assignment_policy\n";
105        }
106    
107        my $now = localtime();
108        my $last_str = localtime($last_update);
109        print $html_fh <<END;
110    <h1>P2P Update at $now</h1>
111    Peer URL $peer->{url}<br>
112    Update from: $last_str<br>
113    END
114    
115        print $log_fh "Beginning P2P update at $now\n";
116        print $log_fh "  Peer URL: $peer->{url}\n";
117        print $log_fh "  Update from: $last_str\n";
118        print $log_fh "\n";
119    
120      my $ret = $peer->request_update($last_update);      my $ret = $peer->request_update($last_update, $update_thru);
121    
122      if (!$ret or ref($ret) ne "ARRAY")      if (!$ret or ref($ret) ne "ARRAY")
123      {      {
124          die "perform_update: request_updated failed\n";          die "perform_update: request_update failed\n";
125      }      }
126    
127      my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,      my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
128         $target_time, $compatible) = @$ret;         $target_time, $compatible) = @$ret;
129    
130      print "perform_update: session=$session target=$target_release num_annos=$num_annos\n";      print "perform_update: session=$session target=@$target_release num_annos=$num_annos\n";
131      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
132    
133        my @my_release = $fig->get_release_info();
134    
135        print $log_fh "Session id = $session\n";
136        print $log_fh "Target release information: \n\t", join("\n\t", @$target_release), "\n";
137        print $log_fh "My release information: \n\t", join("\n\t", @my_release), "\n";
138        print $log_fh "$num_annos annotations\n";
139        print $log_fh "$num_assignments assignments\n";
140        print $log_fh "$num_pegs pegs\n";
141    
142        print $html_fh "Session id = $session<br>\n";
143        print $html_fh "Target release information: <br>\n\t", join("<br>\n\t", @$target_release), "<br>\n";
144        print $html_fh "My release information: <br>\n\t", join("<br>\n\t", @my_release), "<br>\n";
145        print $html_fh "$num_annos annotations<br>\n";
146        print $html_fh "$num_assignments assignments<br>\n";
147        print $html_fh "$num_pegs pegs<br>\n";
148    
149        #
150        # We now know the data release for our peer.
151        #
152        # Open up the peg translation cache database (a AnyDBM_File) tied
153        # to %peg_cache. We needn't worry about keeping it in a directory
154        # based on our current release, as it the cache directory is kept *in*
155        # the current data release directory.
156        #
157    
158        my $cache_handle;
159        my %peg_cache;
160        if ($target_release->[1] ne "")
161        {
162            my $cache_file = "pegcache.$target_release->[1].db";
163            my $cache_dir = "$FIG_Config::data/P2PQueue";
164            $fig->verify_dir($cache_dir);
165    
166            $cache_handle = tie(%peg_cache, "AnyDBM_File", "$cache_dir/$cache_file",
167                                O_CREAT | O_RDWR, 0666);
168            $cache_handle or warn "Could not tie peg_cache to $cache_dir/$cache_file: $!\n";
169        }
170    
171        #
172        # peg_mapping is the local mapping from remote->local peg. This might
173        # be replacable by peg_cache from above.
174        #
175        my %peg_mapping;
176    
177    
178      #      #
179      # We have  the information now to begin the update process. Retrieve the pegs.      # We have  the information now to begin the update process. Retrieve the pegs.
180      #      #
181    
182      $ret = $peer->get_pegs($session, 0, $num_pegs);      _compute_peg_mapping($fig, $peer, $session, $num_pegs, \%peg_mapping, \%peg_cache, $cache_handle,
183                             $skip_tough_search);
184    
185      if (!$ret or ref($ret) ne "ARRAY")      eval { $cache_handle->sync();};
186        untie %peg_cache;
187    
188        #
189        # Create a list of locally-mapped annotations on a per-genome
190        # basis.
191        #
192    
193        my %genome_annos;
194    
195        #
196        # %genome_assignments is a hash mapping from genome to a hashref
197        # that maps  peg to function (since assignments are unique).
198        #
199        # (Hm. Unless two remote pegs map to the same local peg; unclear what to do
200        # then. Punt for now).
201        #
202        my %genome_assignments;
203    
204        #
205        # Retrieve the annotations, and generate a list of mapped annotations.
206        #
207    
208        for (my $anno_start = 0; $anno_start < $num_annos; $anno_start += $anno_batch_size)
209      {      {
210          die "perform_update: get_pegs failed\n";          my $anno_req_len = $num_annos - $anno_start;
211            $anno_req_len = $anno_batch_size if $anno_req_len > $anno_batch_size;
212    
213            print "Retrieve $anno_req_len annos at $anno_start\n";
214            print $log_fh "Retrieve $anno_req_len annos at $anno_start\n";
215    
216            my $annos = $peer->get_annotations($session, $anno_start, $anno_req_len);
217    
218            for my $anno (@$annos)
219            {
220                my($his_id, $ts, $author, $anno) = @$anno;
221    
222                my $my_id = $peg_mapping{$his_id};
223                next unless $my_id;
224    
225                my $genome = $fig->genome_of($my_id);
226    
227                push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
228            }
229      }      }
230    
231      my($peg_list, $genome_list) = @$ret;      #
232        # Do the same for the assignments
233        #
234    
235        # print Dumper($assignments);
236    
237    
238        for (my $assign_start = 0; $assign_start < $num_assignments; $assign_start += $assign_batch_size)
239        {
240            my $assign_req_len = $num_assignments - $assign_start;
241            $assign_req_len = $assign_batch_size if $assign_req_len > $assign_batch_size;
242    
243            print "Retrieve $assign_req_len assigns at $assign_start\n";
244            print $log_fh "Retrieve $assign_req_len assigns at $assign_start\n";
245    
246            my $assignments = $peer->get_assignments($session, $assign_start, $assign_req_len);
247    
248            for my $assign (@$assignments)
249            {
250                my($his_id, $ts, $author, $func) = @$assign;
251    
252                my $my_id = $peg_mapping{$his_id};
253                next unless $my_id;
254    
255                my $genome = $fig->genome_of($my_id);
256    
257                $genome_assignments{$genome}->{$my_id} =  [$my_id, $ts, $author, $func];
258            }
259        }
260    
261        # print Dumper(\%genome_annos);
262    
263      #      #
264      # Walk the peg-list to and generate @pegs_to_finalize.      # Now install annotations.
265      #      #
266    
267      my(%peg_mapping, %genome_map );      for my $genome (keys(%genome_annos))
268        {
269            #
270            # Plan:  Apply the merge_annotations.pl logic. Read the annotations
271            # from the per-org annotations file, add the new ones here, sort, and remove duplicates.
272            # Write the results to the annotations file.
273            #
274            # When we are all done, rerun the index_annotations script.
275            #
276            # Why not do that incrementally? Partly because the annotation_seeks table doesn't
277            # have a column for the genome id, so a removal of old data would require a
278            # string-match query; since a complete reindex of the annotations is pretty
279            # fast (60 sec on a G4 laptop on a firewire disk), it's not clear whether the incremental
280            # update would actually be a win.
281            #
282    
283      for my $peg_info (@$peg_list)          my @annos = @{$genome_annos{$genome}};
284            my $assignments = $genome_assignments{$genome};
285            #
286            # %assignment_annos is a hash from peg to the list
287            # of annotations for that peg.
288            #
289            my %assignment_annos;
290    
291            my $dir = "$FIG_Config::organisms/$genome";
292            my $anno_file = "$dir/annotations";
293            my $anno_bak = "$dir/annotations." . time;
294    
295            my $new_count = @annos;
296    
297            #
298            # Rename the annotations file to a new name based on the current time.
299            #
300    
301            my $gs = $fig->genus_species($genome);
302            print $html_fh "<h1>Updates for $genome ($gs)</h1>\n";
303    
304            if (-f $anno_file)
305      {      {
306          my($key, $peg, @rest) = @$peg_info;              rename($anno_file, $anno_bak) or die "Cannot rename $anno_file to $anno_bak: $!";
307                print $log_fh "Moved annotations file $anno_file to backup $anno_bak\n";
308            }
309    
310          if ($key eq 'peg')          if (open(my $fh, "<$anno_bak"))
311          {          {
312              #              #
313              # Peg id is directly usable.              # While we are scanning here, we look for the latest local assignment
314                # for any peg for which we are installing an assignment.
315                #
316                local($/) = "\n//\n";
317    
318                my($chunk, $peg, $ts, $author, $anno);
319    
320                while (defined($chunk = <$fh>))
321                {
322                    chomp $chunk;
323                    ($peg, $ts, $author, $anno) = split(/\n/, $chunk, 4);
324    
325                    if ($peg =~ /^fig\|/ and $ts =~ /^\d+$/)
326                    {
327                        #
328                        # The last field marks this as an "old" annotation, so we don't
329                        # log its installation later.
330              #              #
331              $peg_mapping{$peg} = $peg;                      my $ent = [$peg, $ts, $author, $anno, 1];
332                        push(@annos, $ent);
333    
334                        if (defined($assignments->{$peg}))
335                        {
336                            #
337                            # We have an incoming assignment for this peg.
338                            # Don't parse anything yet, but push the annotation
339                            # on a list so we can sort by date.
340                            #
341                            push(@{$assignment_annos{$peg}}, $ent);
342          }          }
343          elsif ($key eq 'peg_info')                  }
344                }
345                close($fh);
346            }
347    
348            #
349            # Determine if we are going to install an assignment.
350            #
351    
352            my $cgi_url = &FIG::cgi_url();
353            print $html_fh "<h2>Assignments made</h2>\n";
354            print $html_fh "<table border=\"1\">\n";
355            print $html_fh "<tr><th>PEG</th><th>Old assignment</th><th>New assignment</th><tr>\n";
356    
357            for my $peg (keys %$assignments)
358          {          {
359                my(undef, $ts, $author, $func) = @{$assignments->{$peg}};
360    
361              #              #
362              # Peg id not directly usable.              # Sort the existing annotations for this peg by date.
363                #
364                # Recall that this list has entries [$peg, $timestamp, $author, $anno, $old_flag]
365              #              #
366    
367              my($alias_list, $genome_id) = @rest;              my @eannos;
368                if (ref($assignment_annos{$peg}))
369                {
370                    @eannos = sort { $b->[1] <=> $a->[1] } @{$assignment_annos{$peg}};
371                }
372                else
373                {
374                    #
375                    # No assignment annotations found.
376                    #
377                    @eannos = ();
378                }
379    
380              for my $alias (@$alias_list)              # print "Assignment annos for $peg: ", Dumper(\@eannos);
381    
382                #
383                # Filter out just the master assignments that are newer than
384                # the one we are contemplating putting in place.
385                #
386    
387                my @cand = grep {
388                    ($_->[1] > $ts) and ($_->[3] =~ /Set master function to/)
389                    } @eannos;
390    
391                if (@cand > 0)
392              {              {
393                  my $mapped = $fig->by_alias($alias);                  #
394                  if ($mapped)                  # Here is were some policy needs to be put in place --
395                    # we have a more recent annotation on the current system.
396                    #
397                    # For now, we will not install an assignment if there is any
398                    # newer assignment in place.
399                    #
400    
401                    warn "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
402                    print $log_fh "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
403                }
404                else
405                  {                  {
406                      print "$peg maps to $mapped via $alias\n";                  #
407                      $peg_mapping{$peg}= $mapped;                  # Nothing is blocking us. While we are testing, just slam this assignment in.
408                      last;                  #
409    
410                    my $old = $fig->function_of($peg, 'master');
411    
412                    if ($old ne $func and &$allow_assignment($peg, $ts, $author, $func))
413                    {
414                        my $l = "$cgi_url/protein.cgi?prot=$peg";
415                        print $html_fh "<tr><td><a href=\"$l\">$peg</a></td><td>$old</td><td>$func</td></tr>\n";
416    
417                        print "Assign $peg $func\n";
418                        print $log_fh "Assign $peg $func\n";
419                        print $log_fh "   was $old\n";
420                        $fig->assign_function($peg, 'master', $func);
421    
422                    }
423                  }                  }
424              }              }
425    
426            print $html_fh "</table>\n";
427    
428            print $html_fh "<h2>Annotations added</h2>\n";
429            print $html_fh "<table border=\"1\">\n";
430            print $html_fh "<tr><th>PEG</th><th>Time</th><th>Author</th><th>Annotation</th></tr>\n";
431    
432            open(my $outfh, ">$anno_file") or die "Cannot open new annotation file $anno_file: $!\n";
433    
434            my $last;
435            my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @annos;
436            my $inst = 0;
437            my $dup = 0;
438            foreach my $ann (@sorted)
439            {
440                my $txt = join("\n", @$ann);
441              #              #
442              # If we didn't succeed in mapping by alias,              # Drop the trailing \n if there is one; we  will add it back when we print and
443              # stash this in the list of pegs to be mapped by              # want to ensure the file format remains sane.
             # genome.  
444              #              #
445                chomp $txt;
446                if ($txt ne $last)
447                {
448                    my $peg = $ann->[0];
449                    my $l = "$cgi_url/protein.cgi?prot=$peg";
450                    if (!$ann->[4])
451                    {
452                        print $html_fh "<tr>" . join("\n", map { "<td>$_</td>" }
453                                                     "<a href=\"$l\">$peg</a>",
454                                                     scalar(localtime($ann->[1])), $ann->[2], $ann->[3])
455                            . "</tr>\n";
456                    }
457    
458              if (!defined($peg_mapping{$peg}))                  print $outfh "$txt\n//\n";
459                    $last = $txt;
460                    # print "Inst $ann->[0] $ann->[1] $ann->[2]\n";
461                    $inst++;
462                }
463                else
464              {              {
465                  push(@{$genome_map{$genome_id}}, $peg);                  # print "Dup $ann->[0] $ann->[1] $ann->[2]\n";
466                  print "$peg did not map\n";                  $dup++;
467                }
468              }              }
469            print $html_fh "</table>\n";
470            close($outfh);
471            chmod(0666, $anno_file) or warn "Cannot chmod 0666 $anno_file: $!\n";
472            print "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
473            print $log_fh "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
474        }
475        close($html_fh);
476    }
477    
478    #
479    # Compute the peg mapping for a session.
480    #
481    # $fig          Active FIG instance
482    # $peer         P2P peer for this session.
483    # $session      P2P session ID
484    # $peg_mapping  Hash ref for the remote -> local PEG mapping
485    # $peg_cache    Hash ref for the persistent remote -> local PEG mapping cache db.
486    # $cache_handle AnyDBM_File handle corresponding to $peg_cache.
487    #
488    sub _compute_peg_mapping
489    {
490        my($fig, $peer, $session, $num_pegs, $peg_mapping, $peg_cache, $cache_handle, $skip_tough_search) = @_;
491    
492        #
493        # genome_map is a hash mapping from target genome id to a list of
494        # pegs on the target. This is used to construct a finalize_pegs request after
495        # the first phase of peg mapping.
496        #
497    
498        my %genome_map;
499    
500        #
501        # target_genome_info is a hash mapping from target genome
502        # identifier to the target-side information on the genome -
503        # number of contigs, number of nucleotides, checksum.
504        #
505        # We accumulate it here across possibly multiple batches of
506        # peg retrievals in order to create a single  finalization
507        # list.
508        #
509    
510        my %target_genome_info;
511    
512        #
513        # For very large transfers, we need to batch the peg processing.
514        #
515    
516        for (my $peg_start = 0; $peg_start < $num_pegs; $peg_start += $peg_batch_size)
517        {
518            my $peg_req_len = $num_pegs - $peg_start;
519            $peg_req_len = $peg_batch_size if $peg_req_len > $peg_batch_size;
520    
521            print "Getting $peg_req_len pegs at $peg_start\n";
522            print $log_fh "Getting $peg_req_len pegs at $peg_start\n";
523            my $ret = $peer->get_pegs($session, $peg_start, $peg_req_len);
524    
525            if (!$ret or ref($ret) ne "ARRAY")
526            {
527                die "perform_update: get_pegs failed\n";
528            }
529    
530            my($peg_list, $genome_list) = @$ret;
531    
532            for my $gent (@$genome_list)
533            {
534                $target_genome_info{$gent->[0]} = $gent;
535          }          }
536    
537            _compute_peg_mapping_batch($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
538                                       $peg_list, \%genome_map);
539      }      }
540    
541      #      #
542      # finished first pass. Now go over the per-genome mappings that need to be made.      # We have finished first pass. Now go over the per-genome mappings that need to be made.
543      #      #
544      # $genome_map{$genome_id} is a list of pegs that reside on that genome.      # $genome_map{$genome_id} is a list of pegs that reside on that genome.
545      # the pegs and genome id are both target-based identifiers.      # The pegs and genome id are both target-based identifiers.
546        #
547        # %target_genome_info defines the list of genome information we have on the remote
548        # side.
549        #
550        # We build a request to be passed to finalize_pegs. Each entry in the request is either
551        # ['peg_genome', $peg] which means that we have a genome that corresponds to the
552        # genome the peg is in. We can attempt to map via contig locations.
553        #
554        # If that is not the case,  we pass a request entry of ['peg_unknown', $peg]
555        # which will result in the sequence data being returned.
556      #      #
557    
558      my @finalize_req = ();      my @finalize_req = ();
559    
560        #
561        # local_genome maps a target peg identifier to the local genome id it translates to.
562        #
563      my %local_genome;      my %local_genome;
564    
565      for my $genome_info (@$genome_list)      for my $genome (keys(%target_genome_info))
566      {      {
567          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;          my($tg, $n_contigs, $n_nucs, $cksum) = @{$target_genome_info{$genome}};
568    
569            $tg eq $genome or die "Invalid entry in target_genome_info for $genome => $tg, $n_contigs, $n_nucs, $cksum";
570    
571            #
572            # Don't bother unless we have any pegs to look up.
573            #
574          next unless defined($genome_map{$genome});          next unless defined($genome_map{$genome});
575    
576          #          #
# Line 161  Line 591 
591              #              #
592    
593              print "$genome mapped to $my_genome\n";              print "$genome mapped to $my_genome\n";
594                print $log_fh "$genome mapped to $my_genome\n";
595              for my $peg (@$pegs)              for my $peg (@$pegs)
596              {              {
597                  push(@finalize_req, ['peg_genome', $peg]);                  push(@finalize_req, ['peg_genome', $peg]);
# Line 179  Line 610 
610      }      }
611    
612      #      #
613      # If we need to finalize, make the call.      # We've built our finalization request. Handle it (possibly with batching here too).
614      if (@finalize_req)      #
615    
616        _process_finalization_request($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
617                                     \%local_genome, \@finalize_req, $skip_tough_search);
618    
619    }
620    
621    #
622    # Process one batch of PEGs.
623    #
624    # Same args as _compute_peg_mapping, with the addition of:
625    #
626    #       $peg_list       List of pegs to be processed
627    #       $genome_map     Hash maintaining list of genomes with their pegs.
628    #       $target_genome_info     Hash maintaining overall list of target-side genome information.
629    #
630    sub _compute_peg_mapping_batch
631    {
632        my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
633           $peg_list, $genome_map, $target_genome_info) = @_;
634    
635        #
636        # Walk the list of pegs as returned from get_pegs() and determine what has to
637        # be done.
638        #
639        # If the entry is ['peg', $peg], we can use the peg ID as is.
640        #
641        # If the entry is ['peg_info', $peg, $alias_list, $genome], the peg
642        # has the given aliases, and is in the given genome.
643        #
644        for my $peg_info (@$peg_list)
645        {
646            my($key, $peg, @rest) = @$peg_info;
647    
648            if ($key eq 'peg')
649            {
650                #
651                # Peg id is directly usable.
652                #
653                $peg_mapping->{$peg} = $peg;
654            }
655            elsif ($key eq 'peg_info')
656            {
657                #
658                # Peg id not directly usable. See if we have it in the cache.
659                #
660    
661                if ((my $cached = $peg_cache->{$peg}) ne "")
662                {
663                    #
664                    # Cool, we've cached the result. Use it.
665                    #
666    
667                    $peg_mapping->{$peg} = $cached;
668                    # warn "Found cached mapping $peg => $cached\n";
669                    next;
670                }
671    
672                #
673                # It is not cached. Attempt to resolve by means of alias IDs.
674                #
675    
676                my($alias_list, $genome_id) = @rest;
677    
678                for my $alias (@$alias_list)
679                {
680                    my $mapped = $fig->by_alias($alias);
681                    if ($mapped)
682                    {
683                        print "$peg maps to $mapped via $alias\n";
684                        print $log_fh "$peg maps to $mapped via $alias\n";
685                        $peg_mapping->{$peg}= $mapped;
686                        $peg_cache->{$peg} = $mapped;
687                        last;
688                    }
689                }
690    
691                #
692                # If we weren't able to resolve by ID,
693                # add to %genome_map as a PEG that will need
694                # to be resolved by means of contig location.
695                #
696    
697                if (!defined($peg_mapping->{$peg}))
698                {
699                    push(@{$genome_map->{$genome_id}}, $peg);
700                    print "$peg did not map on first pass\n";
701                    print $log_fh "$peg did not map on first pass\n";
702                }
703            }
704        }
705    
706        #
707        # Flush the cache to write out any computed mappings.
708        #
709        eval { $cache_handle->sync();};
710    
711    }
712    
713    sub _process_finalization_request
714      {      {
715          print Dumper(\@finalize_req);      my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
716          $ret = $peer->finalize_pegs($session, \@finalize_req);         $local_genome, $finalize_req, $skip_tough_search) = @_;
717    
718        #
719        # Immediately return unless there's something to do.
720        #
721        return unless ref($finalize_req) and @$finalize_req > 0;
722    
723        while (@$finalize_req > 0)
724        {
725            my @req = splice(@$finalize_req, 0, $fin_batch_size);
726    
727            print "Invoking finalize_pegs on ", int(@req), " pegs\n";
728            print $log_fh "Invoking finalize_pegs on ", int(@req), " pegs\n";
729            my $ret = $peer->finalize_pegs($session, \@req);
730    
731          if (!$ret or ref($ret) ne "ARRAY")          if (!$ret or ref($ret) ne "ARRAY")
732          {          {
# Line 212  Line 755 
755                  # here, and see if it maps to something.                  # here, and see if it maps to something.
756                  #                  #
757    
758                  my $my_genome = $local_genome{$peg};                  my $my_genome = $local_genome->{$peg};
759                  my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);                  my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
760                  if ($local_contig)                  if ($local_contig)
761                  {                  {
# Line 232  Line 775 
775                      {                      {
776                          my(@ids) = map { $_->[0] } @$res;                          my(@ids) = map { $_->[0] } @$res;
777                          my $id = $ids[0];                          my $id = $ids[0];
778                          $peg_mapping{$peg} = $id;                          $peg_mapping->{$peg} = $id;
779                            $peg_cache->{$peg} = $id;
780                          print "Mapped $peg to $id via contigs\n";                          print "Mapped $peg to $id via contigs\n";
781                          if (@$res > 1)                          if (@$res > 1)
782                          {                          {
783                              warn "Multiple mappings found for $peg: @ids\n";                              warn "Multiple mappings found for $peg: @ids\n";
784                                print $log_fh "Multiple mappings found for $peg: @ids\n";
785                          }                          }
786                      }                      }
787                      else                      else
788                      {                      {
789                          print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";                          print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
790                            print $log_fh "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
791                            print $html_fh "Contig match failed: $peg $my_genome contig $local_contig start $start end $end strand $strand<br>\n";
792                          $sought{$peg}++;                          $sought{$peg}++;
793                          $sought_seq{$peg} = $seq;                          $sought_seq{$peg} = $seq;
794                      }                      }
# Line 249  Line 796 
796                  else                  else
797                  {                  {
798                      print "Mapping failed for $my_genome checksum $cksum\n";                      print "Mapping failed for $my_genome checksum $cksum\n";
799                        print $log_fh "Mapping failed for $my_genome checksum $cksum\n";
800                        print $html_fh "Mapping failed for $my_genome checksum $cksum<br>\n";
801                      $sought{$peg}++;                      $sought{$peg}++;
802                      $sought_seq{$peg} = $seq;                      $sought_seq{$peg} = $seq;
803                  }                  }
# Line 266  Line 815 
815          # Now see if we need to do a tough search.          # Now see if we need to do a tough search.
816          #          #
817    
818          if (keys(%sought) > 0)          if (keys(%sought) > 0 and !$skip_tough_search)
819          {          {
820              my %trans;              my %trans;
821    
822              print "Starting tough search\n";              print "Starting tough search\n";
823                print $log_fh "Starting tough search\n";
824    
825              $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);              $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
826              print "Tough search translated: \n";              print "Tough search translated: \n";
827                print $log_fh "Tough search translated: \n";
828              while (my($tpeg, $ttrans) = each(%trans))              while (my($tpeg, $ttrans) = each(%trans))
829              {              {
830                  print "  $tpeg -> $ttrans\n";                  print "  $tpeg -> $ttrans\n";
831                  $peg_mapping{$tpeg} = $ttrans;                  print $log_fh "  $tpeg -> $ttrans\n";
832              }                  $peg_mapping->{$tpeg} = $ttrans;
833                    $peg_cache->{$tpeg} = $ttrans;
834          }          }
835      }      }
   
     #  
     # Retrieve the annotations, and generate a list of mapped annotations.  
     #  
   
     my $annos = $peer->get_annotations($session, 0, $num_annos > 10 ? 10 : $num_annos);  
   
     #  
     # Create a list of locally-mapped annotations on a per-genome  
     # basis.  
     #  
   
     my %genome_annos;  
   
     for my $anno (@$annos)  
     {  
         my($his_id, $ts, $author, $anno) = @$anno;  
   
         my $my_id = $peg_mapping{$his_id};  
         next unless $my_id;  
   
         my $genome = $fig->genome_of($my_id);  
   
         push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);  
     }  
   
     print Dumper(\%genome_annos);  
   
     #  
     # Now install annotations.  
     #  
   
     for my $genome (keys(%genome_annos))  
     {  
         # _install_genome_annos($fig, $genome, $genome_annos{$genome});  
836      }      }
837  }  }
838    
   
   
839  #############  #############
840  #  #
841  # P2P Relay  # P2P Relay
# Line 340  Line 855 
855  {  {
856      my($class, $url) = @_;      my($class, $url) = @_;
857    
858      my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy($url);      my $creds = [];
859    
860        my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy([$url,
861                                                            credentials => $creds]);
862    
863      my $self = {      my $self = {
864          url => $url,          url => $url,
# Line 413  Line 931 
931          # element in the body of the message.          # element in the body of the message.
932          #          #
933          my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');          my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');
934          print "Reply ns=$ns want $P2P::ns_relay\n";          # print "Reply ns=$ns want $P2P::ns_relay\n";
935    
936          if ($ns eq $P2P::ns_relay)          if ($ns eq $P2P::ns_relay)
937          {          {
938              my $val = $reply->result;              my $val = $reply->result;
939              print "got val=", Dumper($val);              # print "got val=", Dumper($val);
940              if ($val->[0] eq 'deferred')              if ($val->[0] eq 'deferred')
941              {              {
942                  #                  #
# Line 474  Line 992 
992    
993  sub new  sub new
994  {  {
995      my($class, $fig, $url, $peer_id, $relay) = @_;      my($class, $fig, $url, $peer_id, $relay, $credentials) = @_;
996    
997        $credentials = [] unless ref($credentials);
998    
999      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url);      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url, timeout => 3600,
1000                                                    credentials => $credentials);
1001    
1002      my $self = {      my $self = {
1003          fig => $fig,          fig => $fig,
# Line 502  Line 1023 
1023    
1024  sub request_update  sub request_update
1025  {  {
1026      my($self, $last_update) = @_;      my($self, $last_update, $update_thru) = @_;
1027    
1028      my $rel = $self->{fig}->get_release_info();      my $rel = [$self->{fig}->get_release_info()];
1029    
1030      if (!defined($last_update))      if (!defined($last_update))
1031      {      {
1032          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});
1033      }      }
1034    
1035      my $reply = $self->{proxy}->request_update($rel, $last_update);      print "Requesting update via $self->{proxy}\n";
1036        my $reply = $self->{proxy}->request_update($rel, $last_update, $update_thru);
1037        # print "Got reply ", Dumper($reply);
1038    
1039      if ($self->{relay})      if ($self->{relay})
1040      {      {
# Line 555  Line 1078 
1078      return $self->call("get_annotations", $session_id, $start, $length);      return $self->call("get_annotations", $session_id, $start, $length);
1079  }  }
1080    
1081    sub get_assignments
1082    {
1083        my($self, $session_id, $start, $length) = @_;
1084    
1085        return $self->call("get_assignments", $session_id, $start, $length);
1086    }
1087    
1088  sub call  sub call
1089  {  {
1090      my($self, $func, @args) = @_;      my($self, $func, @args) = @_;
# Line 603  Line 1133 
1133    
1134  sub request_update  sub request_update
1135  {  {
1136      my($class, $his_release, $last_update)= @_;      my($class, $his_release, $last_update, $update_thru)= @_;
1137    
1138      #      #
1139      # Verify input.      # Verify input.
# Line 614  Line 1144 
1144          die "request_update: last_update must be a number (not '$last_update')\n";          die "request_update: last_update must be a number (not '$last_update')\n";
1145      }      }
1146    
1147        if ($update_thru eq "")
1148        {
1149            $update_thru = time + 10000;
1150        }
1151    
1152      #      #
1153      # Create a new session id and a spool directory to use for storage      # Create a new session id and a spool directory to use for storage
1154      # of information about it. This can go in the tempdir since it is      # of information about it. This can go in the tempdir since it is
# Line 621  Line 1156 
1156      #      #
1157    
1158      &FIG::verify_dir("$FIG_Config::temp/p2p_spool");      &FIG::verify_dir("$FIG_Config::temp/p2p_spool");
1159      #my $spool_dir = tempdir(DIR  => "$FIG_Config::temp/p2p_spool");      my $spool_dir = tempdir(DIR  => "$FIG_Config::temp/p2p_spool");
1160    
1161      my $spool_dir = "$FIG_Config::temp/p2p_spool/test";      #my $spool_dir = "$FIG_Config::temp/p2p_spool/test";
1162      &FIG::verify_dir($spool_dir);      &FIG::verify_dir($spool_dir);
1163    
1164      my $session_id = basename($spool_dir);      my $session_id = basename($spool_dir);
# Line 666  Line 1201 
1201      my $assign_fh;      my $assign_fh;
1202      open($assign_fh, ">$spool_dir/assignments");      open($assign_fh, ">$spool_dir/assignments");
1203    
1204        #
1205        # We originally used a query to get the PEGs that needed to have annotations
1206        # sent. Unfortunately, this performed very poorly due to all of the resultant
1207        # seeking around in the annotations files.
1208        #
1209        # The code below just runs through all of the anno files looking for annos.
1210        #
1211        # A better way to do this would be to do a query to retrieve the genome id's for
1212        # genomes that have updates. The problem here is that the annotation_seeks
1213        # table doesn't have an explicit genome field.
1214        #
1215        # Surprisingly, to me anyway, the following query appers to run quickly, in both
1216        # postgres and mysql:
1217        #
1218        # SELECT distinct(substring(fid from 5 for position('.peg.' in fid) - 5))
1219        # FROM annotation_seeks
1220        # WHERE dateof > some-date.
1221        #
1222        # The output of that can be parsed to get the genome id and just those
1223        # annotations files searched.
1224        #
1225    
1226      for my $genome (@$all_genomes)      for my $genome (@$all_genomes)
1227      {      {
1228          my $num_annos_for_genome = 0;          my $num_annos_for_genome = 0;
# Line 686  Line 1243 
1243    
1244                  if ((($fid, $anno_time, $who, $anno_text) =                  if ((($fid, $anno_time, $who, $anno_text) =
1245                       ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and                       ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and
1246                      $anno_time > $last_update)                      $anno_time > $last_update and
1247                        $anno_time < $update_thru)
1248    
1249                  {                  {
1250                      #                      #
# Line 800  Line 1358 
1358      # Check compatibility.      # Check compatibility.
1359      #      #
1360    
1361      my $my_release = $fig->get_release_info();      my $my_release = [$fig->get_release_info()];
1362      my $compatible = (defined($my_release) && ($my_release == $his_release)) ? 1 : 0;  
1363        #
1364        # Release id is $my_release->[1].
1365        #
1366    
1367        my $compatible;
1368        if ($my_release->[1] ne "" and $his_release->[1] ne "")
1369        {
1370            #
1371            # Both releases must be defined for them to be compatible.
1372            #
1373            # At some point we need to consider the derived-release issue.
1374            #
1375    
1376            $compatible = $my_release->[1] eq $his_release->[1];
1377        }
1378        else
1379        {
1380            $compatible = 0;
1381        }
1382    
1383      open(my $fh, ">$spool_dir/INFO");      open(my $fh, ">$spool_dir/INFO");
1384      print $fh "requestor_release\t$his_release\n";      print $fh "requestor_release\t$his_release\n";
1385      print $fh "last_update\t$last_update\n";      print $fh "last_update\t$last_update\n";
1386        print $fh "update_thru\t$update_thru\n";
1387      print $fh "cur_update\t$now\n";      print $fh "cur_update\t$now\n";
1388      print $fh "target_release\t$my_release\n";      print $fh "target_release\t$my_release\n";
1389      print $fh "compatible\t$compatible\n";      print $fh "compatible\t$compatible\n";
# Line 820  Line 1398 
1398      #      #
1399    
1400      my @users = keys(%users);      my @users = keys(%users);
1401      $fig->ensure_users(\@users);      # $fig->ensure_users(\@users);
1402    
1403      return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,      return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
1404              $now, $compatible, $users];              $now, $compatible, \@users];
1405  }  }
1406    
1407    
# Line 1053  Line 1631 
1631    
1632      return $anno_output;      return $anno_output;
1633  }  }
1634    
1635    sub get_assignments
1636    {
1637        my($self, $session_id, $start, $len) = @_;
1638    
1639        #
1640        # This is now easy; just run thru the saved assignments and return.
1641        #
1642    
1643        my(%session_info);
1644    
1645        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1646    
1647        -d $spool_dir or die "Invalid session id $session_id";
1648    
1649        #
1650        # Read in the cached information for this session.
1651        #
1652    
1653        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1654        while (<$info_fh>)
1655        {
1656            chomp;
1657            my($var, $val) = split(/\t/, $_, 2);
1658            $session_info{$var} = $val;
1659        }
1660        close($info_fh);
1661    
1662        #
1663        # Sanity check start and length.
1664        #
1665    
1666        if ($start < 0 or $start >= $session_info{num_assignments})
1667        {
1668            die "Invalid start position $start";
1669        }
1670    
1671        if ($len < 0 or ($start + $len - 1) >= $session_info{num_assignments})
1672        {
1673            die "Invalid length $len";
1674        }
1675    
1676        #
1677        # Open file, spin to the starting line, then start reading.
1678        #
1679    
1680        open(my $assign_fh, "<$spool_dir/assignments") or die "Cannot open assignments file: $!";
1681    
1682        my $assign_output = [];
1683    
1684        my $assign_num = 0;
1685    
1686        while (<$assign_fh>)
1687        {
1688            next if ($assign_num < $start);
1689    
1690            last if ($assign_num > ($start + $len));
1691    
1692            chomp;
1693    
1694            my($id, $date, $author, $func) = split(/\t/, $_, 4);
1695    
1696            push(@$assign_output, [$id, $date, $author, $func]);
1697        }
1698        continue
1699        {
1700            $assign_num++;
1701        }
1702    
1703        return $assign_output;
1704    }
1705    
1706    1;

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.26

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3