9 
use HTML; 
use HTML; 
10 
use Sprout; 
use Sprout; 
11 
use Time::HiRes; 
use Time::HiRes; 
12 

use FIGRules; 
13 


14 
our @ISA = qw(SearchHelper); 
our @ISA = qw(SearchHelper); 
15 


57 


58 
Maximum match difference for a BBH hit to be considered valid. The default is C<1e10>. 
Maximum match difference for a BBH hit to be considered valid. The default is C<1e10>. 
59 


60 

=item showMatch 
61 


62 

If TRUE, then all the genes in the target set that match the ones in the reference genome 
63 

will be shown in an extra column. 
64 


65 
=back 
=back 
66 


67 
=head2 Virtual Methods 
=head2 Virtual Methods 
94 
my $commonality = $cgi>param('commonality')  "0.8"; 
my $commonality = $cgi>param('commonality')  "0.8"; 
95 
my $cutoff = $cgi>param('cutoff')  "1e10"; 
my $cutoff = $cgi>param('cutoff')  "1e10"; 
96 
my $statistical = $cgi>param('statistical')  1; 
my $statistical = $cgi>param('statistical')  1; 
97 

my $showMatch = $cgi>param('showMatch')  0; 
98 
# Now we build the table rows. 
# Now we build the table rows. 
99 
my @rows = (); 
my @rows = (); 
100 
# First we have the given genome. 
# First we have the given genome. 
110 
$cgi>td($cgi>textfield(name => 'commonality', 
$cgi>td($cgi>textfield(name => 'commonality', 
111 
value => $commonality, 
value => $commonality, 
112 
size => 5))), 
size => 5))), 
113 
$cgi>Tr($cgi>td(), $cgi>td( 
$cgi>Tr($cgi>td(), $cgi>td(join(" ", 
114 
$cgi>checkbox(name => 'statistical', 
$cgi>checkbox(name => 'statistical', 
115 
checked => $statistical, 
checked => $statistical, 
116 
value => 1, 
value => 1, 
117 
label => 'Use Statistical Algorithm'))); 
label => 'Use Statistical Algorithm')), 
118 

$cgi>checkbox(name => 'showMatch', 
119 

checked => $showMatch, 
120 

value => 1, 
121 

label => 'Show Matching Genes'))); 
122 
push @rows, $cgi>Tr($cgi>td("Cutoff"), 
push @rows, $cgi>Tr($cgi>td("Cutoff"), 
123 
$cgi>td($cgi>textfield(name => 'cutoff', 
$cgi>td($cgi>textfield(name => 'cutoff', 
124 
value => $cutoff, 
value => $cutoff, 
155 
# Declare the return variable. If it remains undefined, the caller will 
# Declare the return variable. If it remains undefined, the caller will 
156 
# assume there was an error. 
# assume there was an error. 
157 
my $retVal; 
my $retVal; 
158 

# Denote the extra columns go at the end. 
159 

$self>SetExtraPos(1); 
160 
# Create the timers. 
# Create the timers. 
161 
my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0); 
my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0); 
162 
# Validate the numeric parameters. 
# Validate the numeric parameters. 
183 
$targetGenomes{$givenGenomeID} = 1 
$targetGenomes{$givenGenomeID} = 1 
184 
} 
} 
185 
# Find out if we want to use a statistical analysis. 
# Find out if we want to use a statistical analysis. 
186 
my $statistical = $cgi>param('statistical')  0; 
my $statistical = $cgi>param('statistical')  1; 
187 

# Find out if we need to show matching genes. 
188 

my $showMatch = $cgi>param('showMatch')  0; 
189 
# Denote we have not yet found any genomes. 
# Denote we have not yet found any genomes. 
190 
$retVal = 0; 
$retVal = 0; 
191 
# Compute the list of genomes of interest. 
# Compute the list of genomes of interest. 
221 
my $bbhList = $bbhMatrix{$fid}; 
my $bbhList = $bbhMatrix{$fid}; 
222 
# We next wish to loop through the BBH IDs, counting how many are in each of the 
# We next wish to loop through the BBH IDs, counting how many are in each of the 
223 
# sets. If a genome occurs twice, we only want to count the first occurrence, so 
# sets. If a genome occurs twice, we only want to count the first occurrence, so 
224 
# we have a hash of genomes we've already seen. 
# we have a hash of genomes we've already seen. The hash will map each gene ID 
225 

# to 0, 1, or 2, depending on whether it was found in the reference genome, 
226 

# a target genome, or an exclusion genome. 
227 
my %alreadySeen = (); 
my %alreadySeen = (); 
228 

# Save the matching genes in here. 
229 

my %genesMatching = (); 
230 
# Clear the exclusion count. 
# Clear the exclusion count. 
231 
my $exclusionCount = 0; 
my $exclusionCount = 0; 
232 
# Denote that we're in our own genome. 
# Denote that we're in our own genome. 
233 
$alreadySeen{$givenGenomeID} = 1; 
$alreadySeen{$givenGenomeID} = 0; 
234 
my $targetCount = 1; 
my $targetCount = 1; 
235 
# Loop through the BBHs. 
# Loop through the BBHs. 
236 
for my $bbhPeg (keys %{$bbhList}) { 
for my $bbhPeg (keys %{$bbhList}) { 
237 
# Get the genome ID. We want to find out if this genome is new. 
# Get the genome ID. We want to find out if this genome is new. 
238 
my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg); 
my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg); 
239 
if (! $alreadySeen{$genomeID}) { 
if (! exists $alreadySeen{$genomeID}) { 
240 
# It's new, so we check to see which set it's in. 
# It's new, so we check to see which set it's in. 
241 
if ($targetGenomes{$genomeID}) { 
if ($targetGenomes{$genomeID}) { 
242 

# It's in the target set. 
243 
$targetCount++; 
$targetCount++; 
244 

$alreadySeen{$genomeID} = 1; 
245 
} elsif ($exclusionGenomes{$genomeID}) { 
} elsif ($exclusionGenomes{$genomeID}) { 
246 

# It's in the exclusion set. 
247 
$exclusionCount++; 
$exclusionCount++; 
248 

$alreadySeen{$genomeID} = 2; 
249 

} 
250 

# Note that $alreadySeen{$genomeID} exists in the hash by this 
251 

# point. If it's 1, we need to save the current PEG. 
252 

if ($alreadySeen{$genomeID} == 1) { 
253 

$genesMatching{$bbhPeg} = 1; 
254 
} 
} 

# Make sure we don't look at it again. 


$alreadySeen{$genomeID} = 1; 

255 
} 
} 
256 
} 
} 
257 
# Create a variable to indicate whether or not we want to keep this feature. 
# Create a variable to indicate whether or not we want to keep this feature and 
258 
my $okFlag; 
# another for the score. 
259 

my ($okFlag, $score); 
260 
# We need to see if we're using statistics or not. This only matters 
# We need to see if we're using statistics or not. This only matters 
261 
# for a twoset situation. 
# for a twoset situation. 
262 
if ($statistical && $exclusionSetSize > 0) { 
if ($statistical && $exclusionSetSize > 0) { 
263 
# This looks like it has something to do with variance computations, 
# This is the magic formula for choosing the differentiating genes. It looks like 
264 
# but I'm not sure. 
# it has something to do with variance computations, but I'm not sure. 
265 
my $targetNotCount = $targetSetSize  $targetCount; 
my $targetNotCount = $targetSetSize  $targetCount; 
266 
my $targetSquare = $targetCount * $targetCount + $targetNotCount * $targetNotCount; 
my $targetSquare = $targetCount * $targetCount + $targetNotCount * $targetNotCount; 
267 
my $exclusionNotCount = $exclusionSetSize  $exclusionCount; 
my $exclusionNotCount = $exclusionSetSize  $exclusionCount; 
270 
my $inD = 1  (($exclusionSetSize * $mixed) / ($targetSetSize * $exclusionSquare)); 
my $inD = 1  (($exclusionSetSize * $mixed) / ($targetSetSize * $exclusionSquare)); 
271 
my $outD = 1  (($targetSetSize * $mixed) / ($exclusionSetSize * $targetSquare)); 
my $outD = 1  (($targetSetSize * $mixed) / ($exclusionSetSize * $targetSquare)); 
272 
# If the two differentials are greater than one, we keep this feature. 
# If the two differentials are greater than one, we keep this feature. 
273 
$okFlag = ($inD + $outD > 1); 
$score = $inD + $outD; 
274 

$okFlag = ($score > 1); 
275 
} else { 
} else { 
276 
# Check to see if we're common in set 1 and not in set 2. 
# Check to see if we're common in set 1 and not in set 2. 
277 
if (IsCommon($targetCount, $targetSetSize, $commonality) && 
my $score1 = IsCommon($targetCount, $targetSetSize, $commonality); 
278 
! IsCommon($exclusionCount, $exclusionSetSize, $commonality)) { 
my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality); 
279 
# We satisfy the criterion, so we put this feature to the output. 
if ($score1 && ! $score2) { 
280 

# We satisfy the criterion, so we put this feature to the output. The 
281 

# score is normalize to a range similar to the scores in the statistical 
282 

# method. 
283 

$score = $score1 + (1  $score2); 
284 
$okFlag = 1; 
$okFlag = 1; 
285 
} 
} 
286 
} 
} 
287 
if ($okFlag) { 
if ($okFlag) { 
288 
# Put this feature to the output. 
# Put this feature to the output. We have one or two extra columns. 
289 

# First we store the score. 
290 

$fquery>AddExtraColumns(score => sprintf("%.3f",$score)); 
291 

# Next we add the list of matching genes, but only if "showMatch" is specified. 
292 

if ($showMatch) { 
293 

# The matching genes are in the hash "genesMatching". 
294 

my @genes = sort { FIGRules::FIGCompare($a,$b) } keys %genesMatching; 
295 

# We need to linkify them. 
296 

my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes); 
297 

# Now add them as an extra column. 
298 

$fquery>AddExtraColumns(matches => $genesHTML); 
299 

} 
300 
$saveTime = time(); 
$saveTime = time(); 
301 
$self>PutFeature($fquery); 
$self>PutFeature($fquery); 
302 
$putTimer += time()  $saveTime; 
$putTimer += time()  $saveTime; 
342 


343 
=head3 IsCommon 
=head3 IsCommon 
344 


345 
C<< my $flag = SHSigGenes::IsCommon($count, $size, $commonality); >> 
C<< my $score = SHSigGenes::IsCommon($count, $size, $commonality); >> 
346 


347 
Return TRUE if a specified count indicates a gene is common in a specified set. 
Return the match score if a specified count indicates a gene is common in a specified set 
348 
Commonality is computed by dividing the count by the size of the set and 
and 0 otherwise. Commonality is computed by dividing the count by the size of the set and 
349 
comparing the result to the minimum commonality ratio. The one exception is 
comparing the result to the minimum commonality ratio. The one exception is 
350 
if the set size is 0. In that case, this method always returns FALSE. 
if the set size is 0. In that case, this method always returns 0. 
351 


352 
=over 4 
=over 4 
353 


378 
my $retVal = 0; 
my $retVal = 0; 
379 
# Only procced if the size is positive. 
# Only procced if the size is positive. 
380 
if ($size > 0) { 
if ($size > 0) { 
381 
$retVal = ($count/$size >= $commonality); 
# Compute the commonality. 
382 

$retVal = $count/$size; 
383 

# If it's too small, clear it. 
384 

if ($retVal < $commonality) { 
385 

$retVal = 0; 
386 

} 
387 
} 
} 
388 
# Return the result. 
# Return the result. 
389 
return $retVal; 
return $retVal; 