3926 |
sub candidates_for_role { |
sub candidates_for_role { |
3927 |
my($self,$role,$genome,$cutoff,$user) = @_; |
my($self,$role,$genome,$cutoff,$user) = @_; |
3928 |
my($peg); |
my($peg); |
3929 |
|
|
3930 |
$user = $user ? $user : "master"; |
$user = $user ? $user : "master"; |
3931 |
|
|
3932 |
my @cand = map { $_->[0] } |
my @cand = map { $_->[0] } |
3934 |
map { $peg = $_; [$peg,$self->crude_estimate_of_distance($genome,&FIG::genome_of($peg))] } |
map { $peg = $_; [$peg,$self->crude_estimate_of_distance($genome,&FIG::genome_of($peg))] } |
3935 |
$self->seqs_with_role($role,$user); |
$self->seqs_with_role($role,$user); |
3936 |
|
|
3937 |
|
return $self->candidates_for_role_from_known($genome,$cutoff,\@cand); |
3938 |
|
} |
3939 |
|
|
3940 |
|
sub candidates_for_role_from_known { |
3941 |
|
my($self,$genome,$cutoff,$known) = @_; |
3942 |
|
my($peg); |
3943 |
|
|
3944 |
|
my @cand = @$known; |
3945 |
my $hits = {}; |
my $hits = {}; |
3946 |
my $seen = {}; |
my $seen = {}; |
3947 |
my $how_many = (@cand > 10) ? 10 : scalar @cand; |
my $how_many = (@cand > 10) ? 10 : scalar @cand; |
3998 |
} |
} |
3999 |
} |
} |
4000 |
|
|
4001 |
|
sub best_bbh_candidate { |
4002 |
|
my($self,$genome,$cutoff,$requested,$known) = @_; |
4003 |
|
my($i,$j,$k,$sim,@sims,$peg,$id2,$genome2,$sim_back); |
4004 |
|
my($bbh,%seen,%computed_sims); |
4005 |
|
|
4006 |
|
my @got = (); |
4007 |
|
my @cand = $self->candidates_for_role_from_known($genome,$cutoff,$known); |
4008 |
|
if (@cand > 0) |
4009 |
|
{ |
4010 |
|
my %genomes = map { $genome = &FIG::genome_of($_); $genome => 1 } @$known; |
4011 |
|
my %pegs = map { $_ => 1 } @$known; |
4012 |
|
for ($i=0; (@got < $requested) && ($i < @cand); $i++) |
4013 |
|
{ |
4014 |
|
$peg = $cand[$i]; |
4015 |
|
undef %seen; |
4016 |
|
@sims = grep { $genomes{&FIG::genome_of($_->id2)} } $self->sims($peg,1000,$cutoff,"fig"); |
4017 |
|
$bbh = 0; |
4018 |
|
for ($j=0; (! $bbh) && ($j < @sims); $j++) |
4019 |
|
{ |
4020 |
|
$sim = $sims[$j]; |
4021 |
|
$id2 = $sim->id2; |
4022 |
|
$genome2 = &FIG::genome_of($id2); |
4023 |
|
if (! $seen{$genome2}) |
4024 |
|
{ |
4025 |
|
if ($pegs{$id2}) |
4026 |
|
{ |
4027 |
|
if (! defined($sim_back = $computed_sims{$id2})) |
4028 |
|
{ |
4029 |
|
my @sims_back = $self->sims($id2,1000,$cutoff,"fig"); |
4030 |
|
for ($k=0; ($k < @sims_back) && (&FIG::genome_of($sims_back[$k]->id2) ne $genome); $k++) {} |
4031 |
|
if ($k < @sims_back) |
4032 |
|
{ |
4033 |
|
$sim_back = $computed_sims{$id2} = $sims_back[$k]; |
4034 |
|
} |
4035 |
|
else |
4036 |
|
{ |
4037 |
|
$sim_back = $computed_sims{$id2} = 0; |
4038 |
|
} |
4039 |
|
} |
4040 |
|
|
4041 |
|
if ($sim_back) |
4042 |
|
{ |
4043 |
|
if ($self->ok_match($sim_back)) |
4044 |
|
{ |
4045 |
|
$bbh = 1; |
4046 |
|
} |
4047 |
|
} |
4048 |
|
} |
4049 |
|
$seen{$genome2} = 1; |
4050 |
|
} |
4051 |
|
} |
4052 |
|
|
4053 |
|
if ($bbh) |
4054 |
|
{ |
4055 |
|
push(@got,$peg); |
4056 |
|
} |
4057 |
|
} |
4058 |
|
} |
4059 |
|
return @got; |
4060 |
|
} |
4061 |
|
|
4062 |
|
|
4063 |
|
sub ok_match { |
4064 |
|
my($self,$sim) = @_; |
4065 |
|
|
4066 |
|
my $ln1 = $sim->ln1; |
4067 |
|
my $ln2 = $sim->ln2; |
4068 |
|
my $b1 = $sim->b1; |
4069 |
|
my $e1 = $sim->e1; |
4070 |
|
my $b2 = $sim->b2; |
4071 |
|
my $e2 = $sim->e2; |
4072 |
|
|
4073 |
|
return (((($e1 - $b1) / $ln1) >= 0.7) && |
4074 |
|
((($e2 - $b2) / $ln2) >= 0.7)) |
4075 |
|
} |
4076 |
|
|
4077 |
################################# DNA sequence Stuff #################################### |
################################# DNA sequence Stuff #################################### |
4078 |
|
|
4079 |
=pod |
=pod |