[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Sep 22 19:33:35 2004 UTC revision 1.18, Wed Jan 5 16:19:44 2005 UTC
# Line 22  Line 22 
22  use Exporter;  use Exporter;
23  use base qw(Exporter);  use base qw(Exporter);
24    
25    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
26    
27  use Data::Dumper;  use Data::Dumper;
28    
29  use vars qw(@EXPORT @EXPORT_OK);  use vars qw(@EXPORT @EXPORT_OK);
# Line 53  Line 55 
55    
56      if (!$ret or ref($ret) ne "ARRAY")      if (!$ret or ref($ret) ne "ARRAY")
57      {      {
58          die "perform_update: request_updated failed\n";          die "perform_update: request_update failed\n";
59      }      }
60    
61      my($session, $target_release, $num_annos, $num_pegs, $num_genomes,      my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
62         $target_time, $compatible) = @$ret;         $target_time, $compatible) = @$ret;
63    
64      print "perform_update: session=$session target=$target_release num_annos=$num_annos\n";      print "perform_update: session=$session target=@$target_release num_annos=$num_annos\n";
65      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";      print "                num_pegs=$num_pegs num_genomes=$num_genomes target_time=$target_time compat=$compatible\n";
66    
67      #      #
# Line 90  Line 92 
92              #              #
93              # Peg id is directly usable.              # Peg id is directly usable.
94              #              #
95                $peg_mapping{$peg} = $peg;
96          }          }
97          elsif ($key eq 'peg_info')          elsif ($key eq 'peg_info')
98          {          {
# Line 102  Line 105 
105              for my $alias (@$alias_list)              for my $alias (@$alias_list)
106              {              {
107                  my $mapped = $fig->by_alias($alias);                  my $mapped = $fig->by_alias($alias);
108                  if ($mapped && $peg !~ /5$/)                  if ($mapped)
109                  {                  {
110                      print "$peg maps to $mapped via $alias\n";                      print "$peg maps to $mapped via $alias\n";
111                      $peg_mapping{$peg}= $mapped;                      $peg_mapping{$peg}= $mapped;
# Line 119  Line 122 
122              if (!defined($peg_mapping{$peg}))              if (!defined($peg_mapping{$peg}))
123              {              {
124                  push(@{$genome_map{$genome_id}}, $peg);                  push(@{$genome_map{$genome_id}}, $peg);
125                    print "$peg did not map\n";
126              }              }
127          }          }
128      }      }
# Line 126  Line 130 
130      #      #
131      # finished first pass. Now go over the per-genome mappings that need to be made.      # finished first pass. Now go over the per-genome mappings that need to be made.
132      #      #
133        # $genome_map{$genome_id} is a list of pegs that reside on that genome.
134        # the pegs and genome id are both target-based identifiers.
135        #
136    
137        my @finalize_req = ();
138        my %local_genome;
139    
140      for my $genome_info (@$genome_list)      for my $genome_info (@$genome_list)
141      {      {
142          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;
143    
144          next unless $genome_map{$genome};          next unless defined($genome_map{$genome});
145    
146            #
147            # Determine if we have a local genome installed that matches precisely the
148            # genome on the target side.
149            #
150          my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);          my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);
151    
152            my $pegs = $genome_map{$genome};
153    
154          if ($my_genome)          if ($my_genome)
155          {          {
156              #              #
157              # Found a match.              # We do have such a local genome. Generate a peg_genome request to
158                # get the location information from the target side.
159                #
160                # Also remember the local genome mapping for this peg.
161              #              #
162              print "Genome $genome maps to $my_genome locally\n";  
163                print "$genome mapped to $my_genome\n";
164                for my $peg (@$pegs)
165                {
166                    push(@finalize_req, ['peg_genome', $peg]);
167                    $local_genome{$peg} = $my_genome;
168                }
169    
170          }          }
171          else          else
172          {          {
173              print "No mapping for $genome\n";              #
174                # We don't have such a genome. We need to retrieve the
175                # sequence data in order to finish mapping.
176                #
177                push(@finalize_req, map { ['peg_unknown', $_] } @$pegs);
178          }          }
179      }      }
180    
181        #
182        # If we need to finalize, make the call.
183        if (@finalize_req)
184        {
185            print Dumper(\@finalize_req);
186            $ret = $peer->finalize_pegs($session, \@finalize_req);
187    
188            if (!$ret or ref($ret) ne "ARRAY")
189            {
190                die "perform_update: finalize_pegs failed\n";
191  }  }
192    
193            #
194            # The return is a list of either location entries or
195            # sequence data. Attempt to finish up the mapping.
196            #
197    
198            my(%sought, %sought_seq);
199    
200    
201            my $dbh = $fig->db_handle();
202            for my $entry (@$ret)
203            {
204                my($what, $peg, @rest) = @$entry;
205    
206                if ($what eq "peg_loc")
207                {
208                    my($strand, $start, $end, $cksum, $seq) = @rest;
209    
210                    #
211                    # We have a contig location. Try to find a matching contig
212                    # here, and see if it maps to something.
213                    #
214    
215                    my $my_genome = $local_genome{$peg};
216                    my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
217                    if ($local_contig)
218                    {
219                        #
220                        # Now look up the local peg. We match on the end location; depending on the strand
221                        # the feature is on, we want to look at either minloc or maxloc.
222                        #
223    
224                        my $whichloc = $strand eq '-' ? "minloc" : "maxloc";
225    
226                        my $res = $dbh->SQL(qq!SELECT id from features
227                                               WHERE $whichloc = $end and genome = '$my_genome' and
228                                               contig = '$local_contig'
229                                            !);
230    
231                        if ($res and @$res > 0)
232                        {
233                            my(@ids) = map { $_->[0] } @$res;
234                            my $id = $ids[0];
235                            $peg_mapping{$peg} = $id;
236                            print "Mapped $peg to $id via contigs\n";
237                            if (@$res > 1)
238                            {
239                                warn "Multiple mappings found for $peg: @ids\n";
240                            }
241                        }
242                        else
243                        {
244                            print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
245                            $sought{$peg}++;
246                            $sought_seq{$peg} = $seq;
247                        }
248                    }
249                    else
250                    {
251                        print "Mapping failed for $my_genome checksum $cksum\n";
252                        $sought{$peg}++;
253                        $sought_seq{$peg} = $seq;
254                    }
255                }
256                elsif ($what eq "peg_seq")
257                {
258                    my($seq) = @rest;
259    
260                    $sought{$peg}++;
261                    $sought_seq{$peg} = $seq;
262                }
263            }
264    
265            #
266            # Now see if we need to do a tough search.
267            #
268    
269            if (keys(%sought) > 0)
270            {
271                my %trans;
272    
273                print "Starting tough search\n";
274    
275                $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
276                print "Tough search translated: \n";
277                while (my($tpeg, $ttrans) = each(%trans))
278                {
279                    print "  $tpeg -> $ttrans\n";
280                    $peg_mapping{$tpeg} = $ttrans;
281                }
282            }
283        }
284    
285        #
286        # Retrieve the annotations, and generate a list of mapped annotations.
287        #
288    
289        my $annos = $peer->get_annotations($session, 0, $num_annos > 10 ? 10 : $num_annos);
290    
291        #
292        # Create a list of locally-mapped annotations on a per-genome
293        # basis.
294        #
295    
296        my %genome_annos;
297    
298        for my $anno (@$annos)
299        {
300            my($his_id, $ts, $author, $anno) = @$anno;
301    
302            my $my_id = $peg_mapping{$his_id};
303            next unless $my_id;
304    
305            my $genome = $fig->genome_of($my_id);
306    
307            push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
308        }
309    
310        print Dumper(\%genome_annos);
311    
312        #
313        # Now install annotations.
314        #
315    
316        for my $genome (keys(%genome_annos))
317        {
318            # _install_genome_annos($fig, $genome, $genome_annos{$genome});
319        }
320    }
321    
322    
323    
324  #############  #############
325  #  #
# Line 290  Line 460 
460  use strict;  use strict;
461    
462  use Data::Dumper;  use Data::Dumper;
463    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
464    
465  use SOAP::Lite;  use SOAP::Lite;
466    
467    #use SOAP::Lite +trace => [qw(transport dispatch result debug)];
468  use P2P;  use P2P;
469    
470  #  #
# Line 303  Line 476 
476  {  {
477      my($class, $fig, $url, $peer_id, $relay) = @_;      my($class, $fig, $url, $peer_id, $relay) = @_;
478    
479      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url);      my $proxy = SOAP::Lite->uri($ns_p2p)->proxy($url, timeout => 3600);
480    
481      my $self = {      my $self = {
482          fig => $fig,          fig => $fig,
# Line 331  Line 504 
504  {  {
505      my($self, $last_update) = @_;      my($self, $last_update) = @_;
506    
507      my $rel = $self->{fig}->get_release_info();      my $rel = [$self->{fig}->get_release_info()];
508    
509      if (!defined($last_update))      if (!defined($last_update))
510      {      {
511          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});          $last_update = $self->{fig}->get_peer_last_update($self->{peer_id});
512      }      }
513    
514        print "Requesting update via $self->{proxy}\n";
515      my $reply = $self->{proxy}->request_update($rel, $last_update);      my $reply = $self->{proxy}->request_update($rel, $last_update);
516        print "Got reply ", Dumper($reply);
517    
518      if ($self->{relay})      if ($self->{relay})
519      {      {
# Line 368  Line 543 
543      return $self->call("get_pegs", $session_id, $start, $length);      return $self->call("get_pegs", $session_id, $start, $length);
544  }  }
545    
546    sub finalize_pegs
547    {
548        my($self, $session_id, $request) = @_;
549    
550        return $self->call("finalize_pegs", $session_id, $request);
551    }
552    
553    sub get_annotations
554    {
555        my($self, $session_id, $start, $length) = @_;
556    
557        return $self->call("get_annotations", $session_id, $start, $length);
558    }
559    
560  sub call  sub call
561  {  {
562      my($self, $func, @args) = @_;      my($self, $func, @args) = @_;
563    
564        my $t0 = [gettimeofday()];
565        print "Calling $func\n";
566      my $reply = $self->{proxy}->$func(@args);      my $reply = $self->{proxy}->$func(@args);
567        my $t1 = [gettimeofday()];
568    
569        my $elap = tv_interval($t0, $t1);
570        print "Call to $func took $elap\n";
571    
572      if ($self->{relay})      if ($self->{relay})
573      {      {
# Line 448  Line 643 
643    
644      my %pegs;      my %pegs;
645    
646        #
647        # We keep track of usernames that have been seen, so that
648        # we can both update our local user database and
649        # we can report them to our peer.
650        #
651    
652        my %users;
653    
654      my $num_annos = 0;      my $num_annos = 0;
655      my $num_genomes = 0;      my $num_genomes = 0;
656      my $num_pegs = 0;      my $num_pegs = 0;
657        my $num_assignments = 0;
658    
659      my $anno_fh;      my $anno_fh;
660      open($anno_fh, ">$spool_dir/annos");      open($anno_fh, ">$spool_dir/annos");
# Line 461  Line 665 
665      my $genome_fh;      my $genome_fh;
666      open($genome_fh, ">$spool_dir/genomes");      open($genome_fh, ">$spool_dir/genomes");
667    
668        my $assign_fh;
669        open($assign_fh, ">$spool_dir/assignments");
670    
671      for my $genome (@$all_genomes)      for my $genome (@$all_genomes)
672      {      {
673          my $num_annos_for_genome = 0;          my $num_annos_for_genome = 0;
674            my %assignment;
675    
676          my $genome_dir = "$FIG_Config::organisms/$genome";          my $genome_dir = "$FIG_Config::organisms/$genome";
677          next unless -d $genome_dir;          next unless -d $genome_dir;
# Line 484  Line 692 
692    
693                  {                  {
694                      #                      #
695                        # Update users list.
696                        #
697    
698                        $users{$who}++;
699    
700                        #
701                      # Look up aliases if we haven't seen this fid before.                      # Look up aliases if we haven't seen this fid before.
702                      #                      #
703    
# Line 501  Line 715 
715    
716                      $num_annos_for_genome++;                      $num_annos_for_genome++;
717                      $num_annos++;                      $num_annos++;
718    
719                        #
720                        # While we're here, see if this is an assignment. We check in the
721                        # %assignment hash, which is keyed on fid, to see if we already
722                        # saw an assignment for this fid. If we have, we keep this one only if
723                        # the assignment time on it is later than the one we saw already.
724                        #
725                        # We are only looking at master assignments for now. We will need
726                        # to return to this issue and reexamine it, but in order to move
727                        # forward I am only matching master assignments.
728                        #
729    
730                        if ($anno_text =~ /Set master function to\n(\S[^\n]+\S)/)
731                        {
732                            my $func = $1;
733    
734                            my $other = $assignment{$fid};
735    
736                            #
737                            # If we haven't seen an assignment for this fid,
738                            # or if it the other assignment has a timestamp that
739                            # is earlier than this one, set the assignment.
740                            #
741    
742                            if (!defined($other) or
743                                ($other->[1] < $anno_time))
744                            {
745                                $assignment{$fid} = [$fid, $anno_time, $who, $func];
746                            }
747                        }
748                  }                  }
749              }              }
750              close($afh);              close($afh);
751    
752                #
753                # Write out the assignments that remain.
754                #
755    
756                for my $fid (sort keys(%assignment))
757                {
758                    print $assign_fh join("\t", @{$assignment{$fid}}), "\n";
759                    $num_assignments++;
760                }
761          }          }
762    
763    
764          #          #
765          # Determine genome information if we have annotations for this one.          # Determine genome information if we have annotations for this one.
766          #          #
# Line 537  Line 792 
792      close($anno_fh);      close($anno_fh);
793      close($peg_fh);      close($peg_fh);
794      close($genome_fh);      close($genome_fh);
795        close($assign_fh);
796    
797      print "Pegs: $num_pegs\n";      print "Pegs: $num_pegs\n";
798      print "Genomes: $num_genomes\n";      print "Genomes: $num_genomes\n";
# Line 546  Line 802 
802      # Check compatibility.      # Check compatibility.
803      #      #
804    
805      my $my_release = $fig->get_release_info();      my $my_release = [$fig->get_release_info()];
806      my $compatible = (defined($my_release) && ($my_release == $his_release)) ? 1 : 0;  
807        #
808        # Release id is $my_release->[1].
809        #
810    
811        my $compatible;
812        if ($my_release->[1] ne "" and $his_release->[1] ne "")
813        {
814            #
815            # Both releases must be defined for them to be compatible.
816            #
817            # At some point we need to consider the derived-release issue.
818            #
819    
820            $compatible = $my_release->[1] eq $his_release->[1];
821        }
822        else
823        {
824            $compatible = 0;
825        }
826    
827      open(my $fh, ">$spool_dir/INFO");      open(my $fh, ">$spool_dir/INFO");
828      print $fh "requestor_release\t$his_release\n";      print $fh "requestor_release\t$his_release\n";
# Line 558  Line 833 
833      print $fh "num_pegs\t$num_pegs\n";      print $fh "num_pegs\t$num_pegs\n";
834      print $fh "num_genomes\t$num_genomes\n";      print $fh "num_genomes\t$num_genomes\n";
835      print $fh "num_annos\t$num_annos\n";      print $fh "num_annos\t$num_annos\n";
836        print $fh "num_assignments\t$num_assignments\n";
837      close($fh);      close($fh);
838    
839      return [$session_id, $my_release, $num_annos, $num_pegs, $num_genomes, $now, $compatible];      #
840        # Construct list of users, and pdate local user database.
841        #
842    
843        my @users = keys(%users);
844        # $fig->ensure_users(\@users);
845    
846        return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
847                $now, $compatible, \@users];
848  }  }
849    
850    
# Line 673  Line 957 
957    
958      return [$peg_output, $genome_output];      return [$peg_output, $genome_output];
959  }  }
960    
961    sub finalize_pegs
962    {
963        my($self, $session, $request) = @_;
964        my($out);
965    
966        my $fig = new FIG;
967    
968        #
969        # Walk the request handling appropriately. This is fairly easy, as it
970        # is just a matter of pulling either sequence or location/contig data.
971        #
972    
973        for my $item (@$request)
974        {
975            my($what, $peg) = @$item;
976    
977            if ($what eq "peg_genome")
978            {
979                #
980                # Return the location and contig checksum for this peg.
981                #
982                # We also include the sequence in case the contig mapping doesn't work.
983                #
984    
985                my $loc = $fig->feature_location($peg);
986                my $contig = $fig->contig_of($loc);
987                my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);
988                my $seq = $fig->get_translation($peg);
989    
990                push(@$out, ['peg_loc', $peg,
991                            $fig->strand_of($peg),
992                            $fig->beg_of($loc), $fig->end_of($loc),
993                            $cksum, $seq]);
994    
995            }
996            elsif ($what eq "peg_unknown")
997            {
998                my $seq = $fig->get_translation($peg);
999                push(@$out, ['peg_seq', $peg, $seq]);
1000            }
1001        }
1002        return $out;
1003    }
1004    
1005    
1006    sub get_annotations
1007    {
1008        my($self, $session_id, $start, $len) = @_;
1009    
1010        #
1011        # This is now easy; just run thru the saved annotations and return.
1012        #
1013    
1014        my(%session_info);
1015    
1016        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
1017    
1018        -d $spool_dir or die "Invalid session id $session_id";
1019    
1020        #
1021        # Read in the cached information for this session.
1022        #
1023    
1024        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1025        while (<$info_fh>)
1026        {
1027            chomp;
1028            my($var, $val) = split(/\t/, $_, 2);
1029            $session_info{$var} = $val;
1030        }
1031        close($info_fh);
1032    
1033        #
1034        # Sanity check start and length.
1035        #
1036    
1037        if ($start < 0 or $start >= $session_info{num_annos})
1038        {
1039            die "Invalid start position $start";
1040        }
1041    
1042        if ($len < 0 or ($start + $len - 1) >= $session_info{num_annos})
1043        {
1044            die "Invalid length $len";
1045        }
1046    
1047        #
1048        # Open file, spin to the starting line, then start reading.
1049        #
1050    
1051        open(my $anno_fh, "<$spool_dir/annos") or die "Cannot open annos file: $!";
1052    
1053        my $anno_output = [];
1054    
1055        my $anno_num = 0;
1056    
1057        local $/ = "//\n";
1058        while (<$anno_fh>)
1059        {
1060            next if ($anno_num < $start);
1061    
1062            last if ($anno_num > ($start + $len));
1063    
1064            chomp;
1065    
1066            my($id, $date, $author, $anno) = split(/\n/, $_, 4);
1067    
1068            push(@$anno_output, [$id, $date, $author, $anno]);
1069        }
1070        continue
1071        {
1072            $anno_num++;
1073        }
1074    
1075        return $anno_output;
1076    }

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.18

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3