[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Wed Jan 5 16:19:44 2005 UTC revision 1.19, Wed Jan 5 22:10:06 2005 UTC
# Line 18  Line 18 
18    
19  use FIG_Config;  use FIG_Config;
20    
21    use DB_File;
22    use Fcntl;
23    
24  use strict;  use strict;
25  use Exporter;  use Exporter;
26  use base qw(Exporter);  use base qw(Exporter);
# Line 49  Line 52 
52    
53  sub perform_update  sub perform_update
54  {  {
55      my($fig, $peer, $last_update) = @_;      my($fig, $peer, $last_update, $skip_tough_search) = @_;
56    
57      my $ret = $peer->request_update($last_update);      my $ret = $peer->request_update($last_update);
58    
# Line 65  Line 68 
68      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
69    
70      #      #
71        # We now know the data release for our peer.
72        #
73        # Open up the peg translation cache database (a DB_File) tied
74        # to %peg_cache. We needn't worry about keeping it in a directory
75        # based on our current release, as it the cache directory is kept *in*
76        # the current data release directory.
77        #
78    
79        my $cache_handle;
80        my %peg_cache;
81        if ($target_release->[1] ne "")
82        {
83            my $cache_file = "pegcache.$target_release->[1].db";
84            my $cache_dir = "$FIG_Config::data/P2PQueue";
85            $fig->verify_dir($cache_dir);
86    
87            $cache_handle = tie(%peg_cache, "DB_File", "$cache_dir/$cache_file",
88                                O_CREAT | O_RDWR, 0666, $DB_HASH);
89            $cache_handle or warn "Could not tie peg_cache to $cache_dir/$cache_file: $!\n";
90        }
91    
92        #
93      # We have  the information now to begin the update process. Retrieve the pegs.      # We have  the information now to begin the update process. Retrieve the pegs.
94      #      #
95    
# Line 97  Line 122 
122          elsif ($key eq 'peg_info')          elsif ($key eq 'peg_info')
123          {          {
124              #              #
125              # Peg id not directly usable.              # Peg id not directly usable. See if we have it in the cache.
126                #
127    
128                if ((my $cached = $peg_cache{$peg}) ne "")
129                {
130                    #
131                    # Cool, we've cached the result. Use it.
132              #              #
133    
134                    $peg_mapping{$peg} = $cached;
135                    warn "Found cached mapping $peg => $cached\n";
136                    next;
137                }
138    
139              my($alias_list, $genome_id) = @rest;              my($alias_list, $genome_id) = @rest;
140    
141              for my $alias (@$alias_list)              for my $alias (@$alias_list)
# Line 109  Line 145 
145                  {                  {
146                      print "$peg maps to $mapped via $alias\n";                      print "$peg maps to $mapped via $alias\n";
147                      $peg_mapping{$peg}= $mapped;                      $peg_mapping{$peg}= $mapped;
148                        $peg_cache{$peg} = $mapped;
149                      last;                      last;
150                  }                  }
151              }              }
# Line 127  Line 164 
164          }          }
165      }      }
166    
167        $cache_handle->sync();
168    
169      #      #
170      # finished first pass. Now go over the per-genome mappings that need to be made.      # finished first pass. Now go over the per-genome mappings that need to be made.
171      #      #
# Line 182  Line 221 
221      # If we need to finalize, make the call.      # If we need to finalize, make the call.
222      if (@finalize_req)      if (@finalize_req)
223      {      {
224          print Dumper(\@finalize_req);          # print Dumper(\@finalize_req);
225          $ret = $peer->finalize_pegs($session, \@finalize_req);          $ret = $peer->finalize_pegs($session, \@finalize_req);
226    
227          if (!$ret or ref($ret) ne "ARRAY")          if (!$ret or ref($ret) ne "ARRAY")
# Line 233  Line 272 
272                          my(@ids) = map { $_->[0] } @$res;                          my(@ids) = map { $_->[0] } @$res;
273                          my $id = $ids[0];                          my $id = $ids[0];
274                          $peg_mapping{$peg} = $id;                          $peg_mapping{$peg} = $id;
275                            $peg_cache{$peg} = $id;
276                          print "Mapped $peg to $id via contigs\n";                          print "Mapped $peg to $id via contigs\n";
277                          if (@$res > 1)                          if (@$res > 1)
278                          {                          {
# Line 266  Line 306 
306          # Now see if we need to do a tough search.          # Now see if we need to do a tough search.
307          #          #
308    
309          if (keys(%sought) > 0)          if (keys(%sought) > 0 and !$skip_tough_search)
310          {          {
311              my %trans;              my %trans;
312    
# Line 278  Line 318 
318              {              {
319                  print "  $tpeg -> $ttrans\n";                  print "  $tpeg -> $ttrans\n";
320                  $peg_mapping{$tpeg} = $ttrans;                  $peg_mapping{$tpeg} = $ttrans;
321                    $peg_cache{$tpeg} = $ttrans;
322              }              }
323          }          }
324      }      }
325        $cache_handle->sync();
326        untie %peg_cache;
327    
328        #
329        # Retrieve the assignments.
330        #
331    
332        my $assignments = $peer->get_assignments($session, 0, $num_assignments);
333    
334      #      #
335      # Retrieve the annotations, and generate a list of mapped annotations.      # Retrieve the annotations, and generate a list of mapped annotations.
336      #      #
337    
338      my $annos = $peer->get_annotations($session, 0, $num_annos > 10 ? 10 : $num_annos);      my $annos = $peer->get_annotations($session, 0, $num_annos);
339    
340      #      #
341      # Create a list of locally-mapped annotations on a per-genome      # Create a list of locally-mapped annotations on a per-genome
# Line 295  Line 344 
344    
345      my %genome_annos;      my %genome_annos;
346    
347        #
348        # %genome_assignments is a hash mapping from genome to a hashref
349        # that maps  peg to function (since assignments are unique).
350        #
351        # (Hm. Unless two remote pegs map to the same local peg; unclear what to do
352        # then. Punt for now).
353        #
354        my %genome_assignments;
355    
356      for my $anno (@$annos)      for my $anno (@$annos)
357      {      {
358          my($his_id, $ts, $author, $anno) = @$anno;          my($his_id, $ts, $author, $anno) = @$anno;
# Line 307  Line 365 
365          push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);          push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
366      }      }
367    
368      print Dumper(\%genome_annos);      #
369        # Do the same for the assignments
370        #
371    
372        for my $assign (@$assignments)
373        {
374            my($his_id, $ts, $author, $func) = @$assign;
375    
376            my $my_id = $peg_mapping{$his_id};
377            next unless $my_id;
378    
379            my $genome = $fig->genome_of($my_id);
380    
381            $genome_assignments{$genome}->{$my_id} =  [$my_id, $ts, $author, $func];
382    
383    
384        }
385    
386        # print Dumper(\%genome_annos);
387    
388      #      #
389      # Now install annotations.      # Now install annotations.
# Line 315  Line 391 
391    
392      for my $genome (keys(%genome_annos))      for my $genome (keys(%genome_annos))
393      {      {
394            #
395            # Plan:  Apply the merge_annotations.pl logic. Read the annotations
396            # from the per-org annotations file, add the new ones here, sort, and remove duplicates.
397            # Write the results to the annotations file.
398            #
399            # When we are all done, rerun the index_annotations script.
400            #
401            # Why not do that incrementally? Partly because the annotation_seeks table doesn't
402            # have a column for the genome id, so a removal of old data would require a
403            # string-match query; since a complete reindex of the annotations is pretty
404            # fast (60 sec on a G4 laptop on a firewire disk), it's not clear whether the incremental
405            # update would actually be a win.
406            #
407    
408            my @annos = @{$genome_annos{$genome}};
409            my $assignments = $genome_assignments{$genome};
410            #
411            # %assignment_annos is a hash from peg to the list
412            # of annotations for that peg.
413            #
414            my %assignment_annos;
415    
416            my $dir = "$FIG_Config::organisms/$genome";
417            my $anno_file = "$dir/annotations";
418            my $anno_bak = "$dir/annotations." . time;
419    
420            my $new_count = @annos;
421    
422            #
423            # Rename the annotations file to a new name based on the current time.
424            #
425    
426            if (-f $anno_file)
427            {
428                rename($anno_file, $anno_bak) or die "Cannot rename $anno_file to $anno_bak: $!";
429            }
430    
431            if (open(my $fh, "<$anno_bak"))
432            {
433                #
434                # While we are scanning here, we look for the latest local assignment
435                # for any peg for which we are installing an assignment.
436                #
437                local($/) = "\n//\n";
438    
439                my($chunk, $peg, $ts, $author, $anno);
440    
441                while (defined($chunk = <$fh>))
442                {
443                    chomp $chunk;
444                    ($peg, $ts, $author, $anno) = split(/\n/, $chunk, 4);
445    
446                    if ($peg =~ /^fig\|/ and $ts =~ /^\d+$/)
447                    {
448                        my $ent = [$peg, $ts, $author, $anno];
449                        push(@annos, $ent);
450    
451                        if (defined($assignments->{$peg}))
452                        {
453                            #
454                            # We have an incoming assignment for this peg.
455                            # Don't parse anything yet, but push the annotation
456                            # on a list so we can sort by date.
457                            #
458                            push(@{$assignment_annos{$peg}}, $ent);
459                        }
460                    }
461                }
462                close($fh);
463            }
464    
465            #
466            # Determine if we are going to install an assignment.
467            #
468    
469            for my $peg (keys %$assignments)
470            {
471                my(undef, $ts, $author, $func) = $assignments->{$peg};
472    
473                #
474                # Sort the existing annotations for this peg by date.#
475                #
476    
477                my @eannos = sort { $b->[1] <=> $a->[1] } @{$assignment_annos{$peg}};
478    
479                print "Assignment annos for $peg: ", Dumper(\@eannos);
480    
481            }
482    
483            open(my $outfh, ">$anno_file") or die "Cannot open new annotation file $anno_file: $!\n";
484    
485            my $last;
486            my @sorted = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) } @annos;
487            my $inst = 0;
488            my $dup = 0;
489            foreach my $ann (@sorted)
490            {
491                my $txt = join("\n", @$ann);
492                #
493                # Drop the trailing \n if there is one; we  will add it back when we print and
494                # want to ensure the file format remains sane.
495                #
496                chomp $txt;
497                if ($txt ne $last)
498                {
499                    print $outfh "$txt\n//\n";
500                    $last = $txt;
501                    print "Inst $ann->[0] $ann->[1] $ann->[2]\n";
502                    $inst++;
503                }
504                else
505                {
506                    print "Dup $ann->[0] $ann->[1] $ann->[2]\n";
507                    $dup++;
508                }
509            }
510            close($outfh);
511            chmod(0666, $anno_file) or warn "Cannot chmod 0666 $anno_file: $!\n";
512            print "Wrote $anno_file. $new_count new annos, $inst installed, $dup duplicates\n";
513    
514            #
515          # _install_genome_annos($fig, $genome, $genome_annos{$genome});          # _install_genome_annos($fig, $genome, $genome_annos{$genome});
516      }      }
517  }  }
# Line 557  Line 754 
754      return $self->call("get_annotations", $session_id, $start, $length);      return $self->call("get_annotations", $session_id, $start, $length);
755  }  }
756    
757    sub get_assignments
758    {
759        my($self, $session_id, $start, $length) = @_;
760    
761        return $self->call("get_assignments", $session_id, $start, $length);
762    }
763    
764  sub call  sub call
765  {  {
766      my($self, $func, @args) = @_;      my($self, $func, @args) = @_;
# Line 1074  Line 1278 
1278    
1279      return $anno_output;      return $anno_output;
1280  }  }
1281    
1282    sub get_assignments
1283    {
1284        my($self, $session_id, $start, $len) = @_;
1285    
1286        #
1287        # This is now easy; just run thru the saved assignments and return.
1288        #
1289    
1290        my(%session_info);
1291    
1292        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1293    
1294        -d $spool_dir or die "Invalid session id $session_id";
1295    
1296        #
1297        # Read in the cached information for this session.
1298        #
1299    
1300        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1301        while (<$info_fh>)
1302        {
1303            chomp;
1304            my($var, $val) = split(/\t/, $_, 2);
1305            $session_info{$var} = $val;
1306        }
1307        close($info_fh);
1308    
1309        #
1310        # Sanity check start and length.
1311        #
1312    
1313        if ($start < 0 or $start >= $session_info{num_assignments})
1314        {
1315            die "Invalid start position $start";
1316        }
1317    
1318        if ($len < 0 or ($start + $len - 1) >= $session_info{num_assignments})
1319        {
1320            die "Invalid length $len";
1321        }
1322    
1323        #
1324        # Open file, spin to the starting line, then start reading.
1325        #
1326    
1327        open(my $assign_fh, "<$spool_dir/assignments") or die "Cannot open assignments file: $!";
1328    
1329        my $assign_output = [];
1330    
1331        my $assign_num = 0;
1332    
1333        while (<$assign_fh>)
1334        {
1335            next if ($assign_num < $start);
1336    
1337            last if ($assign_num > ($start + $len));
1338    
1339            chomp;
1340    
1341            my($id, $date, $author, $func) = split(/\t/, $_, 4);
1342    
1343            push(@$assign_output, [$id, $date, $author, $func]);
1344        }
1345        continue
1346        {
1347            $assign_num++;
1348        }
1349    
1350        return $assign_output;
1351    }
1352    

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3