4463 |
return 0; |
return 0; |
4464 |
} |
} |
4465 |
|
|
4466 |
|
################################# PEG Translation #################################### |
4467 |
|
|
4468 |
|
sub translate_pegs { |
4469 |
|
my($self,$pegs,$seq_of) = @_; |
4470 |
|
my($seq,$aliases,$pegT,%to,%sought,@keys,$peg,$alias); |
4471 |
|
|
4472 |
|
my $tran_peg = {}; |
4473 |
|
|
4474 |
|
foreach $peg (keys(%$pegs)) |
4475 |
|
{ |
4476 |
|
if (($seq = $self->get_translation($peg)) && |
4477 |
|
(length($seq) > 10) && (length($seq_of->{$peg}) > 10) && |
4478 |
|
(uc substr($seq,-10) eq substr($seq_of->{$peg},-10))) |
4479 |
|
{ |
4480 |
|
$tran_peg->{$peg} = $peg; |
4481 |
|
} |
4482 |
|
else |
4483 |
|
{ |
4484 |
|
($aliases,undef,undef) = @{$pegs->{$peg}}; |
4485 |
|
undef %to; |
4486 |
|
foreach $alias (split(/,/,$aliases)) |
4487 |
|
{ |
4488 |
|
if ($pegT = $self->by_alias($alias)) |
4489 |
|
{ |
4490 |
|
$to{$pegT}++; |
4491 |
|
} |
4492 |
|
} |
4493 |
|
|
4494 |
|
if ((@keys = keys(%to)) == 1) |
4495 |
|
{ |
4496 |
|
$tran_peg->{$peg} = $keys[0]; |
4497 |
|
} |
4498 |
|
else |
4499 |
|
{ |
4500 |
|
$sought{$peg} = 1; |
4501 |
|
} |
4502 |
|
} |
4503 |
|
} |
4504 |
|
|
4505 |
|
if ((scalar keys(%sought)) > 0) |
4506 |
|
{ |
4507 |
|
&tough_search($self,$pegs,$seq_of,$tran_peg,\%sought); |
4508 |
|
} |
4509 |
|
return $tran_peg; |
4510 |
|
} |
4511 |
|
|
4512 |
|
sub tough_search { |
4513 |
|
my($self,$pegs,$seq_of,$tran_peg,$sought) = @_; |
4514 |
|
my($peg,$seq,%needH,%needT,%poss,$id,$sub,$to,$x,$genome); |
4515 |
|
|
4516 |
|
foreach $peg (keys(%$sought)) |
4517 |
|
{ |
4518 |
|
if (($seq = $seq_of->{$peg}) && (length($seq) > 50)) |
4519 |
|
{ |
4520 |
|
my $sub = substr($seq,-50); |
4521 |
|
push(@{$needT{$sub}},$peg); |
4522 |
|
$sub = substr($seq,0,50); |
4523 |
|
push(@{$needH{$sub}},$peg); |
4524 |
|
} |
4525 |
|
} |
4526 |
|
# print STDERR &Dumper(\%needT,\%needH); |
4527 |
|
open(NR,"<$FIG_Config::global/nr") |
4528 |
|
|| die "could not open $FIG_Config::global/nr"; |
4529 |
|
$/ = "\n>"; |
4530 |
|
while (defined($_ = <NR>)) |
4531 |
|
{ |
4532 |
|
chomp; |
4533 |
|
if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s) |
4534 |
|
{ |
4535 |
|
$id = $1; |
4536 |
|
$seq = $2; |
4537 |
|
if (length($seq) >= 50) |
4538 |
|
{ |
4539 |
|
$sub = uc substr($seq,-50); |
4540 |
|
if ((($x = $needT{uc substr($seq,-50)}) && (@$x == 1)) || |
4541 |
|
(($x = $needH{uc substr($seq,0,50)}) && (@$x == 1))) |
4542 |
|
{ |
4543 |
|
$peg = $x->[0]; |
4544 |
|
my @same = grep { $_ =~ /^fig/ } map { $_->[0] } $self->mapped_prot_ids($id); |
4545 |
|
if (@same > 0) |
4546 |
|
{ |
4547 |
|
push(@{$poss{$peg}},@same); |
4548 |
|
} |
4549 |
|
} |
4550 |
|
} |
4551 |
|
} |
4552 |
|
} |
4553 |
|
close(NR); |
4554 |
|
$/ = "\n"; |
4555 |
|
|
4556 |
|
# print STDERR &Dumper(\%poss); |
4557 |
|
foreach $peg (keys(%poss)) |
4558 |
|
{ |
4559 |
|
# print STDERR "processing $peg\n"; |
4560 |
|
$x = $poss{$peg}; |
4561 |
|
if (@$x == 1) |
4562 |
|
{ |
4563 |
|
$tran_peg->{$peg} = $x->[0]; |
4564 |
|
delete $sought->{$peg}; |
4565 |
|
} |
4566 |
|
elsif ($genome = $self->probable_genome($self->genome_of($peg),$tran_peg)) |
4567 |
|
{ |
4568 |
|
# print STDERR " mapped to genome $genome\n"; |
4569 |
|
my $genomeQ = quotemeta $genome; |
4570 |
|
my @tmp = grep { $_ =~ /^fig\|$genomeQ\./ } @$x; |
4571 |
|
if (@tmp == 1) |
4572 |
|
{ |
4573 |
|
$tran_peg->{$peg} = $tmp[0]; |
4574 |
|
delete $sought->{$peg}; |
4575 |
|
} |
4576 |
|
else |
4577 |
|
{ |
4578 |
|
# print STDERR &Dumper(["failed",$peg,$genome,\@tmp]); |
4579 |
|
} |
4580 |
|
} |
4581 |
|
else |
4582 |
|
{ |
4583 |
|
# print STDERR "could not place genome for $peg\n"; |
4584 |
|
} |
4585 |
|
} |
4586 |
|
|
4587 |
|
foreach $peg (keys(%$sought)) |
4588 |
|
{ |
4589 |
|
print STDERR "failed to map $peg\n"; |
4590 |
|
} |
4591 |
|
} |
4592 |
|
|
4593 |
|
sub probable_genome { |
4594 |
|
my($self,$genome,$tran_peg) = @_; |
4595 |
|
my($peg,%poss,@poss); |
4596 |
|
|
4597 |
|
my $genomeQ = quotemeta $genome; |
4598 |
|
foreach $peg (grep { $_ =~ /^fig\|$genomeQ\./ } keys(%$tran_peg)) |
4599 |
|
{ |
4600 |
|
$poss{$self->genome_of($tran_peg->{$peg})}++; |
4601 |
|
} |
4602 |
|
@poss = sort { $poss{$b} <=> $poss{$a} } keys(%poss); |
4603 |
|
if ((@poss == 1) || ((@poss > 1) && ($poss{$poss[0]} > $poss{$poss[1]}))) |
4604 |
|
{ |
4605 |
|
return $poss[0]; |
4606 |
|
} |
4607 |
|
else |
4608 |
|
{ |
4609 |
|
print STDERR &Dumper(["could not resolve",\%poss,$genome]); |
4610 |
|
return undef; |
4611 |
|
} |
4612 |
|
} |
4613 |
|
|
4614 |
1 |
1 |