[Bio] / FigKernelPackages / P2P.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/P2P.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Wed Sep 22 19:29:51 2004 UTC revision 1.16, Mon Oct 4 15:57:15 2004 UTC
# Line 22  Line 22 
22  use Exporter;  use Exporter;
23  use base qw(Exporter);  use base qw(Exporter);
24    
25    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
26    
27  use Data::Dumper;  use Data::Dumper;
28    
29  use vars qw(@EXPORT @EXPORT_OK);  use vars qw(@EXPORT @EXPORT_OK);
# Line 56  Line 58 
58          die "perform_update: request_updated failed\n";          die "perform_update: request_updated failed\n";
59      }      }
60    
61      my($session, $target_release, $num_annos, $num_pegs, $num_genomes,      my($session, $target_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
62         $target_time, $compatible) = @$ret;         $target_time, $compatible) = @$ret;
63    
64      print "perform_update: session=$session target=$target_release num_annos=$num_annos\n";      print "perform_update: session=$session target=$target_release num_annos=$num_annos\n";
# Line 90  Line 92 
92              #              #
93              # Peg id is directly usable.              # Peg id is directly usable.
94              #              #
95                $peg_mapping{$peg} = $peg;
96          }          }
97          elsif ($key eq 'peg_info')          elsif ($key eq 'peg_info')
98          {          {
# Line 102  Line 105 
105              for my $alias (@$alias_list)              for my $alias (@$alias_list)
106              {              {
107                  my $mapped = $fig->by_alias($alias);                  my $mapped = $fig->by_alias($alias);
108                  if ($mapped && $peg !~ /5$/)                  if ($mapped)
109                  {                  {
110                      print "$peg maps to $mapped via $alias\n";                      print "$peg maps to $mapped via $alias\n";
111                      $peg_mapping{$peg}= $mapped;                      $peg_mapping{$peg}= $mapped;
# Line 119  Line 122 
122              if (!defined($peg_mapping{$peg}))              if (!defined($peg_mapping{$peg}))
123              {              {
124                  push(@{$genome_map{$genome_id}}, $peg);                  push(@{$genome_map{$genome_id}}, $peg);
125                    print "$peg did not map\n";
126              }              }
127          }          }
128      }      }
# Line 126  Line 130 
130      #      #
131      # finished first pass. Now go over the per-genome mappings that need to be made.      # finished first pass. Now go over the per-genome mappings that need to be made.
132      #      #
133        # $genome_map{$genome_id} is a list of pegs that reside on that genome.
134        # the pegs and genome id are both target-based identifiers.
135        #
136    
137        my @finalize_req = ();
138        my %local_genome;
139    
140      for my $genome_info (@$genome_list)      for my $genome_info (@$genome_list)
141      {      {
142          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;          my($genome, $n_contigs, $n_nucs, $cksum) = @$genome_info;
143    
144          next unless $genome_map{$genome};          next unless defined($genome_map{$genome});
145    
146            #
147            # Determine if we have a local genome installed that matches precisely the
148            # genome on the target side.
149            #
150          my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);          my $my_genome = $fig->find_genome_by_content($genome, $n_contigs, $n_nucs, $cksum);
151    
152            my $pegs = $genome_map{$genome};
153    
154          if ($my_genome)          if ($my_genome)
155          {          {
156              #              #
157              # Found a match.              # We do have such a local genome. Generate a peg_genome request to
158                # get the location information from the target side.
159                #
160                # Also remember the local genome mapping for this peg.
161                #
162    
163                print "$genome mapped to $my_genome\n";
164                for my $peg (@$pegs)
165                {
166                    push(@finalize_req, ['peg_genome', $peg]);
167                    $local_genome{$peg} = $my_genome;
168                }
169    
170            }
171            else
172            {
173                #
174                # We don't have such a genome. We need to retrieve the
175                # sequence data in order to finish mapping.
176                #
177                push(@finalize_req, map { ['peg_unknown', $_] } @$pegs);
178            }
179        }
180    
181        #
182        # If we need to finalize, make the call.
183        if (@finalize_req)
184        {
185            print Dumper(\@finalize_req);
186            $ret = $peer->finalize_pegs($session, \@finalize_req);
187    
188            if (!$ret or ref($ret) ne "ARRAY")
189            {
190                die "perform_update: finalize_pegs failed\n";
191            }
192    
193            #
194            # The return is a list of either location entries or
195            # sequence data. Attempt to finish up the mapping.
196            #
197    
198            my(%sought, %sought_seq);
199    
200    
201            my $dbh = $fig->db_handle();
202            for my $entry (@$ret)
203            {
204                my($what, $peg, @rest) = @$entry;
205    
206                if ($what eq "peg_loc")
207                {
208                    my($strand, $start, $end, $cksum, $seq) = @rest;
209    
210                    #
211                    # We have a contig location. Try to find a matching contig
212                    # here, and see if it maps to something.
213                    #
214    
215                    my $my_genome = $local_genome{$peg};
216                    my $local_contig = $fig->find_contig_with_checksum($my_genome, $cksum);
217                    if ($local_contig)
218                    {
219                        #
220                        # Now look up the local peg. We match on the end location; depending on the strand
221                        # the feature is on, we want to look at either minloc or maxloc.
222                        #
223    
224                        my $whichloc = $strand eq '-' ? "minloc" : "maxloc";
225    
226                        my $res = $dbh->SQL(qq!SELECT id from features
227                                               WHERE $whichloc = $end and genome = '$my_genome' and
228                                               contig = '$local_contig'
229                                            !);
230    
231                        if ($res and @$res > 0)
232                        {
233                            my(@ids) = map { $_->[0] } @$res;
234                            my $id = $ids[0];
235                            $peg_mapping{$peg} = $id;
236                            print "Mapped $peg to $id via contigs\n";
237                            if (@$res > 1)
238                            {
239                                warn "Multiple mappings found for $peg: @ids\n";
240                            }
241                        }
242                        else
243                        {
244                            print "failed: $peg  $my_genome and contig $local_contig start=$start end=$end strand=$strand\n";
245                            $sought{$peg}++;
246                            $sought_seq{$peg} = $seq;
247                        }
248                    }
249                    else
250                    {
251                        print "Mapping failed for $my_genome checksum $cksum\n";
252                        $sought{$peg}++;
253                        $sought_seq{$peg} = $seq;
254                    }
255                }
256                elsif ($what eq "peg_seq")
257                {
258                    my($seq) = @rest;
259    
260                    $sought{$peg}++;
261                    $sought_seq{$peg} = $seq;
262                }
263            }
264    
265            #
266            # Now see if we need to do a tough search.
267              #              #
             print "Genome $genome maps to $my_genome locally\n";  
268    
269            if (keys(%sought) > 0)
270            {
271                my %trans;
272    
273                print "Starting tough search\n";
274    
275                $fig->tough_search(undef, \%sought_seq, \%trans, \%sought);
276                print "Tough search translated: \n";
277                while (my($tpeg, $ttrans) = each(%trans))
278                {
279                    print "  $tpeg -> $ttrans\n";
280                    $peg_mapping{$tpeg} = $ttrans;
281                }
282            }
283        }
284    
285        #
286        # Retrieve the annotations, and generate a list of mapped annotations.
287        #
288    
289        my $annos = $peer->get_annotations($session, 0, $num_annos > 10 ? 10 : $num_annos);
290    
291        #
292        # Create a list of locally-mapped annotations on a per-genome
293        # basis.
294        #
295    
296        my %genome_annos;
297    
298        for my $anno (@$annos)
299        {
300            my($his_id, $ts, $author, $anno) = @$anno;
301    
302            my $my_id = $peg_mapping{$his_id};
303            next unless $my_id;
304    
305            my $genome = $fig->genome_of($my_id);
306    
307            push(@{$genome_annos{$genome}}, [$my_id, $ts, $author, $anno]);
308          }          }
309    
310        print Dumper(\%genome_annos);
311    
312        #
313        # Now install annotations.
314        #
315    
316        for my $genome (keys(%genome_annos))
317        {
318            # _install_genome_annos($fig, $genome, $genome_annos{$genome});
319      }      }
320  }  }
321    
322    
323    
324  #############  #############
325  #  #
326  # P2P Relay  # P2P Relay
# Line 286  Line 460 
460  use strict;  use strict;
461    
462  use Data::Dumper;  use Data::Dumper;
463    use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
464    
465  use SOAP::Lite;  use SOAP::Lite;
466    
467    #use SOAP::Lite +trace => [qw(transport dispatch result debug)];
468  use P2P;  use P2P;
469    
470  #  #
# Line 364  Line 541 
541      return $self->call("get_pegs", $session_id, $start, $length);      return $self->call("get_pegs", $session_id, $start, $length);
542  }  }
543    
544    sub finalize_pegs
545    {
546        my($self, $session_id, $request) = @_;
547    
548        return $self->call("finalize_pegs", $session_id, $request);
549    }
550    
551    sub get_annotations
552    {
553        my($self, $session_id, $start, $length) = @_;
554    
555        return $self->call("get_annotations", $session_id, $start, $length);
556    }
557    
558  sub call  sub call
559  {  {
560      my($self, $func, @args) = @_;      my($self, $func, @args) = @_;
561    
562        my $t0 = [gettimeofday()];
563        print "Calling $func\n";
564      my $reply = $self->{proxy}->$func(@args);      my $reply = $self->{proxy}->$func(@args);
565        my $t1 = [gettimeofday()];
566    
567        my $elap = tv_interval($t0, $t1);
568        print "Call to $func took $elap\n";
569    
570      if ($self->{relay})      if ($self->{relay})
571      {      {
# Line 444  Line 641 
641    
642      my %pegs;      my %pegs;
643    
644        #
645        # We keep track of usernames that have been seen, so that
646        # we can both update our local user database and
647        # we can report them to our peer.
648        #
649    
650        my %users;
651    
652      my $num_annos = 0;      my $num_annos = 0;
653      my $num_genomes = 0;      my $num_genomes = 0;
654      my $num_pegs = 0;      my $num_pegs = 0;
655        my $num_assignments = 0;
656    
657      my $anno_fh;      my $anno_fh;
658      open($anno_fh, ">$spool_dir/annos");      open($anno_fh, ">$spool_dir/annos");
# Line 457  Line 663 
663      my $genome_fh;      my $genome_fh;
664      open($genome_fh, ">$spool_dir/genomes");      open($genome_fh, ">$spool_dir/genomes");
665    
666        my $assign_fh;
667        open($assign_fh, ">$spool_dir/assignments");
668    
669      for my $genome (@$all_genomes)      for my $genome (@$all_genomes)
670      {      {
671          my $num_annos_for_genome = 0;          my $num_annos_for_genome = 0;
672            my %assignment;
673    
674          my $genome_dir = "$FIG_Config::organisms/$genome";          my $genome_dir = "$FIG_Config::organisms/$genome";
675          next unless -d $genome_dir;          next unless -d $genome_dir;
# Line 480  Line 690 
690    
691                  {                  {
692                      #                      #
693                        # Update users list.
694                        #
695    
696                        $users{$who}++;
697    
698                        #
699                      # Look up aliases if we haven't seen this fid before.                      # Look up aliases if we haven't seen this fid before.
700                      #                      #
701    
# Line 497  Line 713 
713    
714                      $num_annos_for_genome++;                      $num_annos_for_genome++;
715                      $num_annos++;                      $num_annos++;
716    
717                        #
718                        # While we're here, see if this is an assignment. We check in the
719                        # %assignment hash, which is keyed on fid, to see if we already
720                        # saw an assignment for this fid. If we have, we keep this one only if
721                        # the assignment time on it is later than the one we saw already.
722                        #
723                        # We are only looking at master assignments for now. We will need
724                        # to return to this issue and reexamine it, but in order to move
725                        # forward I am only matching master assignments.
726                        #
727    
728                        if ($anno_text =~ /Set master function to\n(\S[^\n]+\S)/)
729                        {
730                            my $func = $1;
731    
732                            my $other = $assignment{$fid};
733    
734                            #
735                            # If we haven't seen an assignment for this fid,
736                            # or if it the other assignment has a timestamp that
737                            # is earlier than this one, set the assignment.
738                            #
739    
740                            if (!defined($other) or
741                                ($other->[1] < $anno_time))
742                            {
743                                $assignment{$fid} = [$fid, $anno_time, $who, $func];
744                            }
745                        }
746                  }                  }
747              }              }
748              close($afh);              close($afh);
749    
750                #
751                # Write out the assignments that remain.
752                #
753    
754                for my $fid (sort keys(%assignment))
755                {
756                    print $assign_fh join("\t", @{$assignment{$fid}}), "\n";
757                    $num_assignments++;
758          }          }
759            }
760    
761    
762          #          #
763          # Determine genome information if we have annotations for this one.          # Determine genome information if we have annotations for this one.
# Line 533  Line 790 
790      close($anno_fh);      close($anno_fh);
791      close($peg_fh);      close($peg_fh);
792      close($genome_fh);      close($genome_fh);
793        close($assign_fh);
794    
795      print "Pegs: $num_pegs\n";      print "Pegs: $num_pegs\n";
796      print "Genomes: $num_genomes\n";      print "Genomes: $num_genomes\n";
# Line 554  Line 812 
812      print $fh "num_pegs\t$num_pegs\n";      print $fh "num_pegs\t$num_pegs\n";
813      print $fh "num_genomes\t$num_genomes\n";      print $fh "num_genomes\t$num_genomes\n";
814      print $fh "num_annos\t$num_annos\n";      print $fh "num_annos\t$num_annos\n";
815        print $fh "num_assignments\t$num_assignments\n";
816      close($fh);      close($fh);
817    
818      return [$session_id, $my_release, $num_annos, $num_pegs, $num_genomes, $now, $compatible];      #
819        # Construct list of users, and pdate local user database.
820        #
821    
822        my @users = keys(%users);
823        $fig->ensure_users(\@users);
824    
825        return [$session_id, $my_release, $num_assignments, $num_annos, $num_pegs, $num_genomes,
826                $now, $compatible, \@users];
827  }  }
828    
829    
# Line 669  Line 936 
936    
937      return [$peg_output, $genome_output];      return [$peg_output, $genome_output];
938  }  }
939    
940    sub finalize_pegs
941    {
942        my($self, $session, $request) = @_;
943        my($out);
944    
945        my $fig = new FIG;
946    
947        #
948        # Walk the request handling appropriately. This is fairly easy, as it
949        # is just a matter of pulling either sequence or location/contig data.
950        #
951    
952        for my $item (@$request)
953        {
954            my($what, $peg) = @$item;
955    
956            if ($what eq "peg_genome")
957            {
958                #
959                # Return the location and contig checksum for this peg.
960                #
961                # We also include the sequence in case the contig mapping doesn't work.
962                #
963    
964                my $loc = $fig->feature_location($peg);
965                my $contig = $fig->contig_of($loc);
966                my $cksum = $fig->contig_checksum($fig->genome_of($peg), $contig);
967                my $seq = $fig->get_translation($peg);
968    
969                push(@$out, ['peg_loc', $peg,
970                            $fig->strand_of($peg),
971                            $fig->beg_of($loc), $fig->end_of($loc),
972                            $cksum, $seq]);
973    
974            }
975            elsif ($what eq "peg_unknown")
976            {
977                my $seq = $fig->get_translation($peg);
978                push(@$out, ['peg_seq', $peg, $seq]);
979            }
980        }
981        return $out;
982    }
983    
984    
985    sub get_annotations
986    {
987        my($self, $session_id, $start, $len) = @_;
988    
989        #
990        # This is now easy; just run thru the saved annotations and return.
991        #
992    
993        my(%session_info);
994    
995        my $spool_dir = "$FIG_Config::temp/p2p_spool/$session_id";
996    
997        -d $spool_dir or die "Invalid session id $session_id";
998    
999        #
1000        # Read in the cached information for this session.
1001        #
1002    
1003        open(my $info_fh, "<$spool_dir/INFO") or die "Cannot open INFO file: $!";
1004        while (<$info_fh>)
1005        {
1006            chomp;
1007            my($var, $val) = split(/\t/, $_, 2);
1008            $session_info{$var} = $val;
1009        }
1010        close($info_fh);
1011    
1012        #
1013        # Sanity check start and length.
1014        #
1015    
1016        if ($start < 0 or $start >= $session_info{num_annos})
1017        {
1018            die "Invalid start position $start";
1019        }
1020    
1021        if ($len < 0 or ($start + $len - 1) >= $session_info{num_annos})
1022        {
1023            die "Invalid length $len";
1024        }
1025    
1026        #
1027        # Open file, spin to the starting line, then start reading.
1028        #
1029    
1030        open(my $anno_fh, "<$spool_dir/annos") or die "Cannot open annos file: $!";
1031    
1032        my $anno_output = [];
1033    
1034        my $anno_num = 0;
1035    
1036        local $/ = "//\n";
1037        while (<$anno_fh>)
1038        {
1039            next if ($anno_num < $start);
1040    
1041            last if ($anno_num > ($start + $len));
1042    
1043            chomp;
1044    
1045            my($id, $date, $author, $anno) = split(/\n/, $_, 4);
1046    
1047            push(@$anno_output, [$id, $date, $author, $anno]);
1048        }
1049        continue
1050        {
1051            $anno_num++;
1052        }
1053    
1054        return $anno_output;
1055    }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.16

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3