[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Fri Sep 24 19:48:17 2004 UTC revision 1.24, Mon Jan 10 13:15:00 2005 UTC
# Line 18  Line 18 
18    
19  use FIG_Config;  use FIG_Config;
20    
21    use AnyDBM_File;
22    use Fcntl;
23    
24  use strict;  use strict;
25  use Exporter;  use Exporter;
26  use base qw(Exporter);  use base qw(Exporter);
27    
28    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
29    
30  use Data::Dumper;  use Data::Dumper;
31    
32  use vars qw(@EXPORT @EXPORT_OK);  use vars qw(@EXPORT @EXPORT_OK);
# Line 31  Line 36 
36  our $ns_p2p = "http://thefig.info/schemas/p2p_update";  our $ns_p2p = "http://thefig.info/schemas/p2p_update";
37  our $ns_relay = "http://thefig.info/schemas/p2p_relay";  our $ns_relay = "http://thefig.info/schemas/p2p_relay";
38    
39    my $peg_batch_size = 1000;
40    my $anno_batch_size = 1000;
41    my $assign_batch_size = 1000;
42    my $fin_batch_size = 1000;
43    
44    my $log_fh;
45    my $html_fh;
46    
47  =pod  =pod
48    
49  =head1 perform_update($peer)  =head1 perform_update($peer)
# Line 47  Line 60 
60    
61  sub perform_update  sub perform_update
62  {  {
63      my($fig, $peer, $last_update) = @_;      my($fig, $peer, $last_update, $skip_tough_search, $update_thru, $log_file, $html_file) = @_;
64    
65      my $ret = $peer->request_update($last_update);      $log_file = "/dev/null" unless $log_file ne "";
66        open($log_fh, ">>$log_file") or die "Cannot open logfile $log_file: $!\n";
67        $log_fh->autoflush(1);
68    
69        $html_file = "/dev/null" unless $html_file ne "";
70        open($html_fh, ">$html_file") or die "Cannot open htmlfile $html_file: $!\n";
71        $html_fh->autoflush(1);
72    
73        my $now = localtime();
74        my $last_str = localtime($last_update);
75        print $html_fh <<END;
76    <h1>P2P Update at $now</h1>
77    Peer URL $peer->{url}<br>
78    Update from: $last_str<br>
79    END
80    
81        print $log_fh "Beginning P2P update at $now\n";
82        print $log_fh "  Peer URL: $peer->{url}\n";
83        print $log_fh "  Update from: $last_str\n";
84        print $log_fh "\n";
85    
86        my $ret = $peer->request_update($last_update, $update_thru);
87    
88      if (!$ret or ref($ret) ne "ARRAY")      if (!$ret or ref($ret) ne "ARRAY")
89      {      {
90          die "perform_update: request_updated failed\n";          die "perform_update: request_update failed\n";
91      }      }
92    
93      my($session, $target_release, $num_annos, $num_pegs, $num_genomes,      my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
94         $target_time, $compatible) = @$ret;         $target_time, $compatible) = @$ret;
95    
96      print "perform_update: session=$session target=$target_release num_annos=$num_annos\n";      print "perform_update: session=$session target=@$target_release num_annos=$num_annos\n";
97      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
98    
99        my @my_release = $fig->get_release_info();
100    
101        print $log_fh "Session id = $session\n";
102        print $log_fh "Target release information: \n\t", join("\n\t", @$target_release), "\n";
103        print $log_fh "My release information: \n\t", join("\n\t", @my_release), "\n";
104        print $log_fh "$num_annos annotations\n";
105        print $log_fh "$num_assignments assignments\n";
106        print $log_fh "$num_pegs pegs\n";
107    
108        print $html_fh "Session id = $session<br>\n";
109        print $html_fh "Target release information: <br>\n\t", join("<br>\n\t", @$target_release), "<br>\n";
110        print $html_fh "My release information: <br>\n\t", join("<br>\n\t", @my_release), "<br>\n";
111        print $html_fh "$num_annos annotations<br>\n";
112        print $html_fh "$num_assignments assignments<br>\n";
113        print $html_fh "$num_pegs pegs<br>\n";
114    
115        #
116        # We now know the data release for our peer.
117        #
118        # Open up the peg translation cache database (a AnyDBM_File) tied
119        # to %peg_cache. We needn't worry about keeping it in a directory
120        # based on our current release, as it the cache directory is kept *in*
121        # the current data release directory.
122        #
123    
124        my $cache_handle;
125        my %peg_cache;
126        if ($target_release->[1] ne "")
127        {
128            my $cache_file = "pegcache.$target_release->[1].db";
129            my $cache_dir = "$FIG_Config::data/P2PQueue";
130            $fig->verify_dir($cache_dir);
131    
132            $cache_handle = tie(%peg_cache, "AnyDBM_File", "$cache_dir/$cache_file",
133                                O_CREAT | O_RDWR, 0666);
134            $cache_handle or warn "Could not tie peg_cache to $cache_dir/$cache_file: $!\n";
135        }
136    
137        #
138        # peg_mapping is the local mapping from remote->local peg. This might
139        # be replacable by peg_cache from above.
140        #
141        my %peg_mapping;
142    
143    
144      #      #
145      # We have  the information now to begin the update process. Retrieve the pegs.      # We have  the information now to begin the update process. Retrieve the pegs.
146      #      #
147    
148      $ret = $peer->get_pegs($session, 0, $num_pegs);      _compute_peg_mapping($fig, $peer, $session, $num_pegs, \%peg_mapping, \%peg_cache, $cache_handle,
149                             $skip_tough_search);
150    
151      if (!$ret or ref($ret) ne "ARRAY")      eval { $cache_handle->sync();};
152        untie %peg_cache;
153    
154        #
155        # Create a list of locally-mapped annotations on a per-genome
156        # basis.
157        #
158    
159        my %genome_annos;
160    
161        #
162        # %genome_assignments is a hash mapping from genome to a hashref
163        # that maps  peg to function (since assignments are unique).
164        #
165        # (Hm. Unless two remote pegs map to the same local peg; unclear what to do
166        # then. Punt for now).
167        #
168        my %genome_assignments;
169    
170        #
171        # Retrieve the annotations, and generate a list of mapped annotations.
172        #
173    
174        for (my $anno_start = 0; $anno_start < $num_annos; $anno_start += $anno_batch_size)
175      {      {
176          die "perform_update: get_pegs failed\n";          my $anno_req_len = $num_annos - $anno_start;
177            $anno_req_len = $anno_batch_size if $anno_req_len > $anno_batch_size;
178    
179            print "Retrieve $anno_req_len annos at $anno_start\n";
180            print $log_fh "Retrieve $anno_req_len annos at $anno_start\n";
181    
182            my $annos = $peer->get_annotations($session, $anno_start, $anno_req_len);
183    
184            for my $anno (@$annos)
185            {
186                my($his_id, $ts, $author, $anno) = @$anno;
187    
188                my $my_id = $peg_mapping{$his_id};
189                next unless $my_id;
190    
191                my $genome = $fig->genome_of($my_id);
192    
193                push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
194            }
195      }      }
196    
197      my($peg_list, $genome_list) = @$ret;      #
198        # Do the same for the assignments
199        #
200    
201        # print Dumper($assignments);
202    
203    
204        for (my $assign_start = 0; $assign_start < $num_assignments; $assign_start += $assign_batch_size)
205        {
206            my $assign_req_len = $num_assignments - $assign_start;
207            $assign_req_len = $assign_batch_size if $assign_req_len > $assign_batch_size;
208    
209            print "Retrieve $assign_req_len assigns at $assign_start\n";
210            print $log_fh "Retrieve $assign_req_len assigns at $assign_start\n";
211    
212            my $assignments = $peer->get_assignments($session, $assign_start, $assign_req_len);
213    
214            for my $assign (@$assignments)
215            {
216                my($his_id, $ts, $author, $func) = @$assign;
217    
218                my $my_id = $peg_mapping{$his_id};
219                next unless $my_id;
220    
221                my $genome = $fig->genome_of($my_id);
222    
223                $genome_assignments{$genome}->{$my_id} =  [$my_id, $ts, $author, $func];
224            }
225        }
226    
227        # print Dumper(\%genome_annos);
228    
229        #
230        # Now install annotations.
231        #
232    
233        for my $genome (keys(%genome_annos))
234        {
235            #
236            # Plan:  Apply the merge_annotations.pl logic. Read the annotations
237            # from the per-org annotations file, add the new ones here, sort, and remove duplicates.
238            # Write the results to the annotations file.
239            #
240            # When we are all done, rerun the index_annotations script.
241            #
242            # Why not do that incrementally? Partly because the annotation_seeks table doesn't
243            # have a column for the genome id, so a removal of old data would require a
244            # string-match query; since a complete reindex of the annotations is pretty
245            # fast (60 sec on a G4 laptop on a firewire disk), it's not clear whether the incremental
246            # update would actually be a win.
247            #
248    
249            my @annos = @{$genome_annos{$genome}};
250            my $assignments = $genome_assignments{$genome};
251      #      #
252      # Walk the peg-list to and generate @pegs_to_finalize.          # %assignment_annos is a hash from peg to the list
253            # of annotations for that peg.
254      #      #
255            my %assignment_annos;
256    
257      my(%peg_mapping, %genome_map );          my $dir = "$FIG_Config::organisms/$genome";
258            my $anno_file = "$dir/annotations";
259            my $anno_bak = "$dir/annotations." . time;
260    
261      for my $peg_info (@$peg_list)          my $new_count = @annos;
262    
263            #
264            # Rename the annotations file to a new name based on the current time.
265            #
266    
267            my $gs = $fig->genus_species($genome);
268            print $html_fh "<h1>Updates for $genome ($gs)</h1>\n";
269    
270            if (-f $anno_file)
271      {      {
272          my($key, $peg, @rest) = @$peg_info;              rename($anno_file, $anno_bak) or die "Cannot rename $anno_file to $anno_bak: $!";
273                print $log_fh "Moved annotations file $anno_file to backup $anno_bak\n";
274            }
275    
276          if ($key eq 'peg')          if (open(my $fh, "<$anno_bak"))
277          {          {
278              #              #
279              # Peg id is directly usable.              # While we are scanning here, we look for the latest local assignment
280                # for any peg for which we are installing an assignment.
281                #
282                local($/) = "\n//\n";
283    
284                my($chunk, $peg, $ts, $author, $anno);
285    
286                while (defined($chunk = <$fh>))
287                {
288                    chomp $chunk;
289                    ($peg, $ts, $author, $anno) = split(/\n/, $chunk, 4);
290    
291                    if ($peg =~ /^fig\|/ and $ts =~ /^\d+$/)
292                    {
293                        my $ent = [$peg, $ts, $author, $anno];
294                        push(@annos, $ent);
295    
296                        if (defined($assignments->{$peg}))
297                        {
298              #              #
299              $peg_mapping{$peg} = $peg;                          # We have an incoming assignment for this peg.
300                            # Don't parse anything yet, but push the annotation
301                            # on a list so we can sort by date.
302                            #
303                            push(@{$assignment_annos{$peg}}, $ent);
304          }          }
305          elsif ($key eq 'peg_info')                  }
306                }
307                close($fh);
308            }
309    
310            #
311            # Determine if we are going to install an assignment.
312            #
313    
314            my $cgi_url = &FIG::cgi_url();
315            print $html_fh "<h2>Assignments made</h2>\n";
316            print $html_fh "<table>\n";
317            print $html_fh "<tr><th>PEG</th><th>Old assignment</th><th>New assignment</th><tr>\n";
318    
319            for my $peg (keys %$assignments)
320            {
321                my(undef, $ts, $author, $func) = @{$assignments->{$peg}};
322    
323                #
324                # Sort the existing annotations for this peg by date.
325                #
326                # Recall that this list has entries [$peg, $timestamp, $author, $anno]
327                #
328    
329                my @eannos;
330                if (ref($assignment_annos{$peg}))
331                {
332                    @eannos = sort { $b->[1] <=> $a->[1] } @{$assignment_annos{$peg}};
333                }
334                else
335          {          {
336              #              #
337              # Peg id not directly usable.                  # No assignment annotations found.
338              #              #
339                    @eannos = ();
340                }
341    
342              my($alias_list, $genome_id) = @rest;              # print "Assignment annos for $peg: ", Dumper(\@eannos);
343    
344              for my $alias (@$alias_list)              #
345                # Filter out just the master assignments that are newer than
346                # the one we are contemplating putting in place.
347                #
348    
349                my @cand = grep {
350                    ($_->[1] > $ts) and ($_->[3] =~ /Set master function to/)
351                    } @eannos;
352    
353                if (@cand > 0)
354              {              {
355                  my $mapped = $fig->by_alias($alias);                  #
356                  if ($mapped)                  # Here is were some policy needs to be put in place --
357                    # we have a more recent annotation on the current system.
358                    #
359                    # For now, we will not install an assignment if there is any
360                    # newer assignment in place.
361                    #
362    
363                    warn "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
364                    print $log_fh "Skipping assignment for $peg $func due to more recent assignment $cand[0]->[3]\n";
365                }
366                else
367                  {                  {
368                      print "$peg maps to $mapped via $alias\n";                  #
369                      $peg_mapping{$peg}= $mapped;                  # Nothing is blocking us. While we are testing, just slam this assignment in.
370                      last;                  #
371    
372                    my $old = $fig->function_of($peg, 'master');
373    
374                    if ($old ne $func)
375                    {
376                        my $l = "$cgi_url/protein.cgi?prot=$peg";
377                        print $html_fh "<tr><td><a href=\"$l\">$peg</a></td><td>$old</td><td>$func</td></tr>\n";
378    
379                        print "Assign $peg $func\n";
380                        print $log_fh "Assign $peg $func\n";
381                        print $log_fh "   was $old\n";
382                        $fig->assign_function($peg, 'master', $func);
383    
384                    }
385                }
386            }
387    
388            print $html_fh "</table>\n";
389    
390            print $html_fh "<h2>Annotations added</h2>\n";
391            print $html_fh "<table>\n";
392            print $html_fh "<tr><th>PEG</th><th>Time</th><th>Author</th><th>Annotation</th></tr>\n";
393    
394            open(my $outfh, ">$anno_file") or die "Cannot open new annotation file $anno_file: $!\n";
395    
396            my $last;
397            my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @annos;
398            my $inst = 0;
399            my $dup = 0;
400            foreach my $ann (@sorted)
401            {
402                my $txt = join("\n", @$ann);
403                #
404                # Drop the trailing \n if there is one; we  will add it back when we print and
405                # want to ensure the file format remains sane.
406                #
407                chomp $txt;
408                if ($txt ne $last)
409                {
410                    my $peg = $ann->[0];
411                    my $l = "$cgi_url/protein.cgi?prot=$peg";
412                    print $html_fh "<tr>" . join("\n", map { "<td>$_</td>" }
413                                                 "<a href=\"$l\">$peg</a>",
414                                                 scalar(localtime($ann->[1])), $ann->[2], $ann->[3])
415                        . "</tr>\n";
416    
417                    print $outfh "$txt\n//\n";
418                    $last = $txt;
419                    # print "Inst $ann->[0] $ann->[1] $ann->[2]\n";
420                    $inst++;
421                }
422                else
423                {
424                    # print "Dup $ann->[0] $ann->[1] $ann->[2]\n";
425                    $dup++;
426                }
427            }
428            print $html_fh "</table>\n";
429            close($outfh);
430            chmod(0666, $anno_file) or warn "Cannot chmod 0666 $anno_file: $!\n";
431            print "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
432            print $log_fh "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
433                  }                  }
434        close($html_fh);
435              }              }
436    
437              #              #
438              # If we didn't succeed in mapping by alias,  # Compute the peg mapping for a session.
439              # stash this in the list of pegs to be mapped by  #
440              # genome.  # $fig          Active FIG instance
441    # $peer         P2P peer for this session.
442    # $session      P2P session ID
443    # $peg_mapping  Hash ref for the remote -> local PEG mapping
444    # $peg_cache    Hash ref for the persistent remote -> local PEG mapping cache db.
445    # $cache_handle AnyDBM_File handle corresponding to $peg_cache.
446    #
447    sub _compute_peg_mapping
448    {
449        my($fig, $peer, $session, $num_pegs, $peg_mapping, $peg_cache, $cache_handle, $skip_tough_search) = @_;
450    
451        #
452        # genome_map is a hash mapping from target genome id to a list of
453        # pegs on the target. This is used to construct a finalize_pegs request after
454        # the first phase of peg mapping.
455        #
456    
457        my %genome_map;
458    
459        #
460        # target_genome_info is a hash mapping from target genome
461        # identifier to the target-side information on the genome -
462        # number of contigs, number of nucleotides, checksum.
463        #
464        # We accumulate it here across possibly multiple batches of
465        # peg retrievals in order to create a single  finalization
466        # list.
467        #
468    
469        my %target_genome_info;
470    
471        #
472        # For very large transfers, we need to batch the peg processing.
473              #              #
474    
475              if (!defined($peg_mapping{$peg}))      for (my $peg_start = 0; $peg_start < $num_pegs; $peg_start += $peg_batch_size)
476              {              {
477                  push(@{$genome_map{$genome_id}}, $peg);          my $peg_req_len = $num_pegs - $peg_start;
478                  print "$peg did not map\n";          $peg_req_len = $peg_batch_size if $peg_req_len > $peg_batch_size;
479    
480            print "Getting $peg_req_len pegs at $peg_start\n";
481            print $log_fh "Getting $peg_req_len pegs at $peg_start\n";
482            my $ret = $peer->get_pegs($session, $peg_start, $peg_req_len);
483    
484            if (!$ret or ref($ret) ne "ARRAY")
485            {
486                die "perform_update: get_pegs failed\n";
487              }              }
488    
489            my($peg_list, $genome_list) = @$ret;
490    
491            for my $gent (@$genome_list)
492            {
493                $target_genome_info{$gent->[0]} = $gent;
494          }          }
495    
496            _compute_peg_mapping_batch($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
497                                       $peg_list, \%genome_map);
498      }      }
499    
500      #      #
501      # finished first pass. Now go over the per-genome mappings that need to be made.      # We have finished first pass. Now go over the per-genome mappings that need to be made.
502      #      #
503      # $genome_map{$genome_id} is a list of pegs that reside on that genome.      # $genome_map{$genome_id} is a list of pegs that reside on that genome.
504      # the pegs and genome id are both target-based identifiers.      # The pegs and genome id are both target-based identifiers.
505        #
506        # %target_genome_info defines the list of genome information we have on the remote
507        # side.
508        #
509        # We build a request to be passed to finalize_pegs. Each entry in the request is either
510        # ['peg_genome', $peg] which means that we have a genome that corresponds to the
511        # genome the peg is in. We can attempt to map via contig locations.
512        #
513        # If that is not the case,  we pass a request entry of ['peg_unknown', $peg]
514        # which will result in the sequence data being returned.
515      #      #
516    
517      my @finalize_req = ();      my @finalize_req = ();
518    
519        #
520        # local_genome maps a target peg identifier to the local genome id it translates to.
521        #
522      my %local_genome;      my %local_genome;
523    
524      for my $genome_info (@$genome_list)      for my $genome (keys(%target_genome_info))
525      {      {
526          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;          my($tg, $n_contigs, $n_nucs, $cksum) = @{$target_genome_info{$genome}};
527    
528            $tg eq $genome or die "Invalid entry in target_genome_info for $genome => $tg, $n_contigs, $n_nucs, $cksum";
529    
530            #
531            # Don't bother unless we have any pegs to look up.
532            #
533          next unless defined($genome_map{$genome});          next unless defined($genome_map{$genome});
534    
535          #          #
# Line 159  Line 550 
550              #              #
551    
552              print "$genome mapped to $my_genome\n";              print "$genome mapped to $my_genome\n";
553                print $log_fh "$genome mapped to $my_genome\n";
554              for my $peg (@$pegs)              for my $peg (@$pegs)
555              {              {
556                  push(@finalize_req, ['peg_genome', $peg]);                  push(@finalize_req, ['peg_genome', $peg]);
# Line 177  Line 569 
569      }      }
570    
571      #      #
572      # If we need to finalize, make the call.      # We've built our finalization request. Handle it (possibly with batching here too).
573      if (@finalize_req)      #
574    
575        _process_finalization_request($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
576                                     \%local_genome, \@finalize_req, $skip_tough_search);
577    
578    }
579    
580    #
581    # Process one batch of PEGs.
582    #
583    # Same args as _compute_peg_mapping, with the addition of:
584    #
585    #       $peg_list       List of pegs to be processed
586    #       $genome_map     Hash maintaining list of genomes with their pegs.
587    #       $target_genome_info     Hash maintaining overall list of target-side genome information.
588    #
589    sub _compute_peg_mapping_batch
590    {
591        my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
592           $peg_list, $genome_map, $target_genome_info) = @_;
593    
594        #
595        # Walk the list of pegs as returned from get_pegs() and determine what has to
596        # be done.
597        #
598        # If the entry is ['peg', $peg], we can use the peg ID as is.
599        #
600        # If the entry is ['peg_info', $peg, $alias_list, $genome], the peg
601        # has the given aliases, and is in the given genome.
602        #
603        for my $peg_info (@$peg_list)
604        {
605            my($key, $peg, @rest) = @$peg_info;
606    
607            if ($key eq 'peg')
608            {
609                #
610                # Peg id is directly usable.
611                #
612                $peg_mapping->{$peg} = $peg;
613            }
614            elsif ($key eq 'peg_info')
615            {
616                #
617                # Peg id not directly usable. See if we have it in the cache.
618                #
619    
620                if ((my $cached = $peg_cache->{$peg}) ne "")
621                {
622                    #
623                    # Cool, we've cached the result. Use it.
624                    #
625    
626                    $peg_mapping->{$peg} = $cached;
627                    # warn "Found cached mapping $peg => $cached\n";
628                    next;
629                }
630    
631                #
632                # It is not cached. Attempt to resolve by means of alias IDs.
633                #
634    
635                my($alias_list, $genome_id) = @rest;
636    
637                for my $alias (@$alias_list)
638                {
639                    my $mapped = $fig->by_alias($alias);
640                    if ($mapped)
641                    {
642                        print "$peg maps to $mapped via $alias\n";
643                        print $log_fh "$peg maps to $mapped via $alias\n";
644                        $peg_mapping->{$peg}= $mapped;
645                        $peg_cache->{$peg} = $mapped;
646                        last;
647                    }
648                }
649    
650                #
651                # If we weren't able to resolve by ID,
652                # add to %genome_map as a PEG that will need
653                # to be resolved by means of contig location.
654                #
655    
656                if (!defined($peg_mapping->{$peg}))
657                {
658                    push(@{$genome_map->{$genome_id}}, $peg);
659                    print "$peg did not map on first pass\n";
660                    print $log_fh "$peg did not map on first pass\n";
661                }
662            }
663        }
664    
665        #
666        # Flush the cache to write out any computed mappings.
667        #
668        eval { $cache_handle->sync();};
669    
670    }
671    
672    sub _process_finalization_request
673    {
674        my($fig, $peer, $session, $peg_mapping, $peg_cache, $cache_handle,
675           $local_genome, $finalize_req, $skip_tough_search) = @_;
676    
677        #
678        # Immediately return unless there's something to do.
679        #
680        return unless ref($finalize_req) and @$finalize_req > 0;
681    
682        while (@$finalize_req > 0)
683      {      {
684          print Dumper(\@finalize_req);          my @req = splice(@$finalize_req, 0, $fin_batch_size);
685          $ret = $peer->finalize_pegs($session, \@finalize_req);  
686            print "Invoking finalize_pegs on ", int(@req), " pegs\n";
687            print $log_fh "Invoking finalize_pegs on ", int(@req), " pegs\n";
688            my $ret = $peer->finalize_pegs($session, \@req);
689    
690          if (!$ret or ref($ret) ne "ARRAY")          if (!$ret or ref($ret) ne "ARRAY")
691          {          {
# Line 193  Line 697 
697          # sequence data. Attempt to finish up the mapping.          # sequence data. Attempt to finish up the mapping.
698          #          #
699    
700            my(%sought, %sought_seq);
701    
702    
703          my $dbh = $fig->db_handle();          my $dbh = $fig->db_handle();
704          for my $entry (@$ret)          for my $entry (@$ret)
# Line 201  Line 707 
707    
708              if ($what eq "peg_loc")              if ($what eq "peg_loc")
709              {              {
710                  my($strand, $start, $end, $cksum) = @rest;                  my($strand, $start, $end, $cksum, $seq) = @rest;
711    
712                  #                  #
713                  # We have a contig location. Try to find a matching contig                  # We have a contig location. Try to find a matching contig
714                  # here, and see if it maps to something.                  # here, and see if it maps to something.
715                  #                  #
716    
717                  my $my_genome = $local_genome{$peg};                  my $my_genome = $local_genome->{$peg};
718                  my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);                  my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
719                  if ($local_contig)                  if ($local_contig)
720                  {                  {
# Line 224  Line 730 
730                                             contig = '$local_contig'                                             contig = '$local_contig'
731                                          !);                                          !);
732    
733                      if ($res and @$res == 1)                      if ($res and @$res > 0)
734                      {                      {
735                          my($id) = $res->[0]->[0];                          my(@ids) = map { $_->[0] } @$res;
736                          $peg_mapping{$peg} = $id;                          my $id = $ids[0];
737                            $peg_mapping->{$peg} = $id;
738                            $peg_cache->{$peg} = $id;
739                          print "Mapped $peg to $id via contigs\n";                          print "Mapped $peg to $id via contigs\n";
740                            if (@$res > 1)
741                            {
742                                warn "Multiple mappings found for $peg: @ids\n";
743                                print $log_fh "Multiple mappings found for $peg: @ids\n";
744                            }
745                      }                      }
746                      else                      else
747                      {                      {
748                          print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";                          print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
749                            print $log_fh "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
750                            print $html_fh "Contig match failed: $peg $my_genome contig $local_contig start $start end $end strand $strand<br>\n";
751                            $sought{$peg}++;
752                            $sought_seq{$peg} = $seq;
753                      }                      }
754                  }                  }
755                  else                  else
756                  {                  {
757                      print "Mapping failed for $my_genome checksum $cksum\n";                      print "Mapping failed for $my_genome checksum $cksum\n";
758                        print $log_fh "Mapping failed for $my_genome checksum $cksum\n";
759                        print $html_fh "Mapping failed for $my_genome checksum $cksum<br>\n";
760                        $sought{$peg}++;
761                        $sought_seq{$peg} = $seq;
762                  }                  }
763              }              }
764                elsif ($what eq "peg_seq")
765                {
766                    my($seq) = @rest;
767    
768                    $sought{$peg}++;
769                    $sought_seq{$peg} = $seq;
770          }          }
771      }      }
 }  
772    
773            #
774            # Now see if we need to do a tough search.
775            #
776    
777            if (keys(%sought) > 0 and !$skip_tough_search)
778            {
779                my %trans;
780    
781                print "Starting tough search\n";
782                print $log_fh "Starting tough search\n";
783    
784                $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
785                print "Tough search translated: \n";
786                print $log_fh "Tough search translated: \n";
787                while (my($tpeg, $ttrans) = each(%trans))
788                {
789                    print "  $tpeg -> $ttrans\n";
790                    print $log_fh "  $tpeg -> $ttrans\n";
791                    $peg_mapping->{$tpeg} = $ttrans;
792                    $peg_cache->{$tpeg} = $ttrans;
793                }
794            }
795        }
796    }
797    
798  #############  #############
799  #  #
# Line 264  Line 814 
814  {  {
815      my($class, $url) = @_;      my($class, $url) = @_;
816    
817      my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy($url);      my $creds = ['seed-linux-2.uchicago.edu:80',
818                     'SEED User',
819                     annotator => 'anno4all'];
820    
821        my $proxy = SOAP::Lite->uri($P2P::ns_relay)->proxy([$url,
822                                                            credentials => $creds]);
823    
824      my $self = {      my $self = {
825          url => $url,          url => $url,
# Line 337  Line 892 
892          # element in the body of the message.          # element in the body of the message.
893          #          #
894          my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');          my $ns = $reply->namespaceuriof('/Envelope/Body/[1]');
895          print "Reply ns=$ns want $P2P::ns_relay\n";          # print "Reply ns=$ns want $P2P::ns_relay\n";
896    
897          if ($ns eq $P2P::ns_relay)          if ($ns eq $P2P::ns_relay)
898          {          {
899              my $val = $reply->result;              my $val = $reply->result;
900              print "got val=", Dumper($val);              # print "got val=", Dumper($val);
901              if ($val->[0] eq 'deferred')              if ($val->[0] eq 'deferred')
902              {              {
903                  #                  #
# Line 384  Line 939 
939  use strict;  use strict;
940    
941  use Data::Dumper;  use Data::Dumper;
942    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
943    
944  use SOAP::Lite;  use SOAP::Lite;
945    
946    #use SOAP::Lite +trace => [qw(transport dispatch result debug)];
947  use P2P;  use P2P;
948    
949  #  #
# Line 395  Line 953 
953    
954  sub new  sub new
955  {  {
956      my($class, $fig, $url, $peer_id, $relay) = @_;      my($class, $fig, $url, $peer_id, $relay, $credentials) = @_;
957    
958        $credentials = [] unless ref($credentials);
959    
960      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url);      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url, timeout => 3600,
961                                                    credentials => $credentials);
962    
963      my $self = {      my $self = {
964          fig => $fig,          fig => $fig,
# Line 423  Line 984 
984    
985  sub request_update  sub request_update
986  {  {
987      my($self, $last_update) = @_;      my($self, $last_update, $update_thru) = @_;
988    
989      my $rel = $self->{fig}->get_release_info();      my $rel = [$self->{fig}->get_release_info()];
990    
991      if (!defined($last_update))      if (!defined($last_update))
992      {      {
993          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});
994      }      }
995    
996      my $reply = $self->{proxy}->request_update($rel, $last_update);      print "Requesting update via $self->{proxy}\n";
997        my $reply = $self->{proxy}->request_update($rel, $last_update, $update_thru);
998        # print "Got reply ", Dumper($reply);
999    
1000      if ($self->{relay})      if ($self->{relay})
1001      {      {
# Line 469  Line 1032 
1032      return $self->call("finalize_pegs", $session_id, $request);      return $self->call("finalize_pegs", $session_id, $request);
1033  }  }
1034    
1035    sub get_annotations
1036    {
1037        my($self, $session_id, $start, $length) = @_;
1038    
1039        return $self->call("get_annotations", $session_id, $start, $length);
1040    }
1041    
1042    sub get_assignments
1043    {
1044        my($self, $session_id, $start, $length) = @_;
1045    
1046        return $self->call("get_assignments", $session_id, $start, $length);
1047    }
1048    
1049  sub call  sub call
1050  {  {
1051      my($self, $func, @args) = @_;      my($self, $func, @args) = @_;
1052    
1053        my $t0 = [gettimeofday()];
1054        print "Calling $func\n";
1055      my $reply = $self->{proxy}->$func(@args);      my $reply = $self->{proxy}->$func(@args);
1056        my $t1 = [gettimeofday()];
1057    
1058        my $elap = tv_interval($t0, $t1);
1059        print "Call to $func took $elap\n";
1060    
1061      if ($self->{relay})      if ($self->{relay})
1062      {      {
# Line 511  Line 1094 
1094    
1095  sub request_update  sub request_update
1096  {  {
1097      my($class, $his_release, $last_update)= @_;      my($class, $his_release, $last_update, $update_thru)= @_;
1098    
1099      #      #
1100      # Verify input.      # Verify input.
# Line 522  Line 1105 
1105          die "request_update: last_update must be a number (not '$last_update')\n";          die "request_update: last_update must be a number (not '$last_update')\n";
1106      }      }
1107    
1108        if ($update_thru eq "")
1109        {
1110            $update_thru = time + 10000;
1111        }
1112    
1113      #      #
1114      # Create a new session id and a spool directory to use for storage      # Create a new session id and a spool directory to use for storage
1115      # of information about it. This can go in the tempdir since it is      # of information about it. This can go in the tempdir since it is
# Line 529  Line 1117 
1117      #      #
1118    
1119      &FIG::verify_dir("$FIG_Config::temp/p2p_spool");      &FIG::verify_dir("$FIG_Config::temp/p2p_spool");
1120      #my $spool_dir = tempdir(DIR  => "$FIG_Config::temp/p2p_spool");      my $spool_dir = tempdir(DIR  => "$FIG_Config::temp/p2p_spool");
1121    
1122      my $spool_dir = "$FIG_Config::temp/p2p_spool/test";      #my $spool_dir = "$FIG_Config::temp/p2p_spool/test";
1123      &FIG::verify_dir($spool_dir);      &FIG::verify_dir($spool_dir);
1124    
1125      my $session_id = basename($spool_dir);      my $session_id = basename($spool_dir);
# Line 549  Line 1137 
1137    
1138      my %pegs;      my %pegs;
1139    
1140        #
1141        # We keep track of usernames that have been seen, so that
1142        # we can both update our local user database and
1143        # we can report them to our peer.
1144        #
1145    
1146        my %users;
1147    
1148      my $num_annos = 0;      my $num_annos = 0;
1149      my $num_genomes = 0;      my $num_genomes = 0;
1150      my $num_pegs = 0;      my $num_pegs = 0;
1151        my $num_assignments = 0;
1152    
1153      my $anno_fh;      my $anno_fh;
1154      open($anno_fh, ">$spool_dir/annos");      open($anno_fh, ">$spool_dir/annos");
# Line 562  Line 1159 
1159      my $genome_fh;      my $genome_fh;
1160      open($genome_fh, ">$spool_dir/genomes");      open($genome_fh, ">$spool_dir/genomes");
1161    
1162        my $assign_fh;
1163        open($assign_fh, ">$spool_dir/assignments");
1164    
1165      for my $genome (@$all_genomes)      for my $genome (@$all_genomes)
1166      {      {
1167          my $num_annos_for_genome = 0;          my $num_annos_for_genome = 0;
1168            my %assignment;
1169    
1170          my $genome_dir = "$FIG_Config::organisms/$genome";          my $genome_dir = "$FIG_Config::organisms/$genome";
1171          next unless -d $genome_dir;          next unless -d $genome_dir;
# Line 581  Line 1182 
1182    
1183                  if ((($fid, $anno_time, $who, $anno_text) =                  if ((($fid, $anno_time, $who, $anno_text) =
1184                       ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and                       ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*\S)/s)) and
1185                      $anno_time > $last_update)                      $anno_time > $last_update and
1186                        $anno_time < $update_thru)
1187    
1188                  {                  {
1189                      #                      #
1190                        # Update users list.
1191                        #
1192    
1193                        $users{$who}++;
1194    
1195                        #
1196                      # Look up aliases if we haven't seen this fid before.                      # Look up aliases if we haven't seen this fid before.
1197                      #                      #
1198    
# Line 602  Line 1210 
1210    
1211                      $num_annos_for_genome++;                      $num_annos_for_genome++;
1212                      $num_annos++;                      $num_annos++;
1213    
1214                        #
1215                        # While we're here, see if this is an assignment. We check in the
1216                        # %assignment hash, which is keyed on fid, to see if we already
1217                        # saw an assignment for this fid. If we have, we keep this one only if
1218                        # the assignment time on it is later than the one we saw already.
1219                        #
1220                        # We are only looking at master assignments for now. We will need
1221                        # to return to this issue and reexamine it, but in order to move
1222                        # forward I am only matching master assignments.
1223                        #
1224    
1225                        if ($anno_text =~ /Set master function to\n(\S[^\n]+\S)/)
1226                        {
1227                            my $func = $1;
1228    
1229                            my $other = $assignment{$fid};
1230    
1231                            #
1232                            # If we haven't seen an assignment for this fid,
1233                            # or if it the other assignment has a timestamp that
1234                            # is earlier than this one, set the assignment.
1235                            #
1236    
1237                            if (!defined($other) or
1238                                ($other->[1] < $anno_time))
1239                            {
1240                                $assignment{$fid} = [$fid, $anno_time, $who, $func];
1241                            }
1242                        }
1243                  }                  }
1244              }              }
1245              close($afh);              close($afh);
1246    
1247                #
1248                # Write out the assignments that remain.
1249                #
1250    
1251                for my $fid (sort keys(%assignment))
1252                {
1253                    print $assign_fh join("\t", @{$assignment{$fid}}), "\n";
1254                    $num_assignments++;
1255          }          }
1256            }
1257    
1258    
1259          #          #
1260          # Determine genome information if we have annotations for this one.          # Determine genome information if we have annotations for this one.
# Line 638  Line 1287 
1287      close($anno_fh);      close($anno_fh);
1288      close($peg_fh);      close($peg_fh);
1289      close($genome_fh);      close($genome_fh);
1290        close($assign_fh);
1291    
1292      print "Pegs: $num_pegs\n";      print "Pegs: $num_pegs\n";
1293      print "Genomes: $num_genomes\n";      print "Genomes: $num_genomes\n";
# Line 647  Line 1297 
1297      # Check compatibility.      # Check compatibility.
1298      #      #
1299    
1300      my $my_release = $fig->get_release_info();      my $my_release = [$fig->get_release_info()];
1301      my $compatible = (defined($my_release) && ($my_release == $his_release)) ? 1 : 0;  
1302        #
1303        # Release id is $my_release->[1].
1304        #
1305    
1306        my $compatible;
1307        if ($my_release->[1] ne "" and $his_release->[1] ne "")
1308        {
1309            #
1310            # Both releases must be defined for them to be compatible.
1311            #
1312            # At some point we need to consider the derived-release issue.
1313            #
1314    
1315            $compatible = $my_release->[1] eq $his_release->[1];
1316        }
1317        else
1318        {
1319            $compatible = 0;
1320        }
1321    
1322      open(my $fh, ">$spool_dir/INFO");      open(my $fh, ">$spool_dir/INFO");
1323      print $fh "requestor_release\t$his_release\n";      print $fh "requestor_release\t$his_release\n";
1324      print $fh "last_update\t$last_update\n";      print $fh "last_update\t$last_update\n";
1325        print $fh "update_thru\t$update_thru\n";
1326      print $fh "cur_update\t$now\n";      print $fh "cur_update\t$now\n";
1327      print $fh "target_release\t$my_release\n";      print $fh "target_release\t$my_release\n";
1328      print $fh "compatible\t$compatible\n";      print $fh "compatible\t$compatible\n";
1329      print $fh "num_pegs\t$num_pegs\n";      print $fh "num_pegs\t$num_pegs\n";
1330      print $fh "num_genomes\t$num_genomes\n";      print $fh "num_genomes\t$num_genomes\n";
1331      print $fh "num_annos\t$num_annos\n";      print $fh "num_annos\t$num_annos\n";
1332        print $fh "num_assignments\t$num_assignments\n";
1333      close($fh);      close($fh);
1334    
1335      return [$session_id, $my_release, $num_annos, $num_pegs, $num_genomes, $now, $compatible];      #
1336        # Construct list of users, and pdate local user database.
1337        #
1338    
1339        my @users = keys(%users);
1340        # $fig->ensure_users(\@users);
1341    
1342        return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
1343                $now, $compatible, \@users];
1344  }  }
1345    
1346    
# Line 796  Line 1475 
1475              #              #
1476              # Return the location and contig checksum for this peg.              # Return the location and contig checksum for this peg.
1477              #              #
1478                # We also include the sequence in case the contig mapping doesn't work.
1479                #
1480    
1481              my $loc = $fig->feature_location($peg);              my $loc = $fig->feature_location($peg);
1482              my $contig = $fig->contig_of($loc);              my $contig = $fig->contig_of($loc);
1483              my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);              my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);
1484              warn "Checksum for '$loc' '$contig' is $cksum\n";              my $seq = $fig->get_translation($peg);
1485    
1486              push(@$out, ['peg_loc', $peg,              push(@$out, ['peg_loc', $peg,
1487                          $fig->strand_of($loc),                          $fig->strand_of($peg),
1488                          $fig->beg_of($loc), $fig->end_of($loc),                          $fig->beg_of($loc), $fig->end_of($loc),
1489                          $cksum]);                          $cksum, $seq]);
1490    
1491          }          }
1492          elsif ($what eq "peg_unknown")          elsif ($what eq "peg_unknown")
# Line 817  Line 1498 
1498      return $out;      return $out;
1499  }  }
1500    
1501    
1502    sub get_annotations
1503    {
1504        my($self, $session_id, $start, $len) = @_;
1505    
1506        #
1507        # This is now easy; just run thru the saved annotations and return.
1508        #
1509    
1510        my(%session_info);
1511    
1512        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1513    
1514        -d $spool_dir or die "Invalid session id $session_id";
1515    
1516        #
1517        # Read in the cached information for this session.
1518        #
1519    
1520        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1521        while (<$info_fh>)
1522        {
1523            chomp;
1524            my($var, $val) = split(/\t/, $_, 2);
1525            $session_info{$var} = $val;
1526        }
1527        close($info_fh);
1528    
1529        #
1530        # Sanity check start and length.
1531        #
1532    
1533        if ($start < 0 or $start >= $session_info{num_annos})
1534        {
1535            die "Invalid start position $start";
1536        }
1537    
1538        if ($len < 0 or ($start + $len - 1) >= $session_info{num_annos})
1539        {
1540            die "Invalid length $len";
1541        }
1542    
1543        #
1544        # Open file, spin to the starting line, then start reading.
1545        #
1546    
1547        open(my $anno_fh, "<$spool_dir/annos") or die "Cannot open annos file: $!";
1548    
1549        my $anno_output = [];
1550    
1551        my $anno_num = 0;
1552    
1553        local $/ = "//\n";
1554        while (<$anno_fh>)
1555        {
1556            next if ($anno_num < $start);
1557    
1558            last if ($anno_num > ($start + $len));
1559    
1560            chomp;
1561    
1562            my($id, $date, $author, $anno) = split(/\n/, $_, 4);
1563    
1564            push(@$anno_output, [$id, $date, $author, $anno]);
1565        }
1566        continue
1567        {
1568            $anno_num++;
1569        }
1570    
1571        return $anno_output;
1572    }
1573    
1574    sub get_assignments
1575    {
1576        my($self, $session_id, $start, $len) = @_;
1577    
1578        #
1579        # This is now easy; just run thru the saved assignments and return.
1580        #
1581    
1582        my(%session_info);
1583    
1584        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1585    
1586        -d $spool_dir or die "Invalid session id $session_id";
1587    
1588        #
1589        # Read in the cached information for this session.
1590        #
1591    
1592        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1593        while (<$info_fh>)
1594        {
1595            chomp;
1596            my($var, $val) = split(/\t/, $_, 2);
1597            $session_info{$var} = $val;
1598        }
1599        close($info_fh);
1600    
1601        #
1602        # Sanity check start and length.
1603        #
1604    
1605        if ($start < 0 or $start >= $session_info{num_assignments})
1606        {
1607            die "Invalid start position $start";
1608        }
1609    
1610        if ($len < 0 or ($start + $len - 1) >= $session_info{num_assignments})
1611        {
1612            die "Invalid length $len";
1613        }
1614    
1615        #
1616        # Open file, spin to the starting line, then start reading.
1617        #
1618    
1619        open(my $assign_fh, "<$spool_dir/assignments") or die "Cannot open assignments file: $!";
1620    
1621        my $assign_output = [];
1622    
1623        my $assign_num = 0;
1624    
1625        while (<$assign_fh>)
1626        {
1627            next if ($assign_num < $start);
1628    
1629            last if ($assign_num > ($start + $len));
1630    
1631            chomp;
1632    
1633            my($id, $date, $author, $func) = split(/\t/, $_, 4);
1634    
1635            push(@$assign_output, [$id, $date, $author, $func]);
1636        }
1637        continue
1638        {
1639            $assign_num++;
1640        }
1641    
1642        return $assign_output;
1643    }
1644    
1645    1;

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3