[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Fri Sep 29 15:10:05 2006 UTC revision 1.10, Wed Dec 6 03:36:04 2006 UTC
# Line 8  Line 8 
8      use CGI;      use CGI;
9      use HTML;      use HTML;
10      use Sprout;      use Sprout;
11        use Time::HiRes;
12    
13      our @ISA = qw(SearchHelper);      our @ISA = qw(SearchHelper);
14    
# Line 61  Line 62 
62    
63  =head3 Form  =head3 Form
64    
65  C<< my $html = $shelp->Include(); >>  C<< my $html = $shelp->Form(); >>
66    
67  Generate the HTML for a form to request a new search.  Generate the HTML for a form to request a new search.
68    
# Line 80  Line 81 
81      # there is the selector for the given genome, the commonality and cutoff values,      # there is the selector for the given genome, the commonality and cutoff values,
82      # and the submit button. Our first task, then, is to get the genome selection      # and the submit button. Our first task, then, is to get the genome selection
83      # menus.      # menus.
84      my $givenMenu   = $self->NmpdrGenomeMenu('given', 0, [$cgi->param('genome')]);      my $givenMenu   = $self->NmpdrGenomeMenu('given', 0, [$cgi->param('given')]);
85      my $targetMenu  = $self->NmpdrGenomeMenu('target', 'multiple', [$cgi->param('target')]);      my $targetMenu  = $self->NmpdrGenomeMenu('target', 'multiple', [$cgi->param('target')], 10, 'exclusion');
86      my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', 'multiple', [$cgi->param('exclusion')]);      my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', 'multiple', [$cgi->param('exclusion')], 10, 'target');
87      # Get the default values to use for the commonality and cutoff controls.      # Get the default values to use for the commonality and cutoff controls.
88      my $commonality = $cgi->param('commonality') || "0.8";      my $commonality = $cgi->param('commonality') || "0.8";
89      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
90      # Now we build the table rows. The top contains the two numeric parameters and      my $statistical = $cgi->param('statistical') || 1;
91      # the submit button.      # Now we build the table rows.
92      my @rows = ();      my @rows = ();
93        # First we have the given genome.
94        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Reference Genome"),
95                             $cgi->td({colspan => 2}, $givenMenu));
96        # Now show the target and exclusion menus.
97        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Inclusion Genomes (Set 1)"),
98                             $cgi->td({colspan => 2}, $targetMenu));
99        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
100                             $cgi->td({colspan => 2}, $excludeMenu));
101        # Next, the numeric parameters.
102      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
103                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
104                                                    -value => $commonality,                                                    -value => $commonality,
105                                                    -size => 5)));                                                    -size => 5))),
106                    $cgi->Tr($cgi->td(), $cgi->td(
107                                      $cgi->checkbox(-name => 'statistical',
108                                                     -checked => $statistical,
109                                                     -value => 1,
110                                                     -label => 'Use Statistical Algorithm')));
111      push @rows, $cgi->Tr($cgi->td("Cutoff"),      push @rows, $cgi->Tr($cgi->td("Cutoff"),
112                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
113                                                    -value => $cutoff,                                                    -value => $cutoff,
114                                                    -size => 5)));                                                    -size => 5)));
115      push @rows, $self->SubmitRow();      # Next, the feature filter rows.
     # The next rows have the given genome and a feature filter.  
     push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Given Genome"),  
                          $cgi->td({colspan => 2}, $givenMenu));  
116      push @rows, $self->FeatureFilterRows();      push @rows, $self->FeatureFilterRows();
117      # Now show the target and exclusion menus.      # Finally, the submit button.
118      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Target Genomes (Set 1)"),      push @rows, $self->SubmitRow();
                          $cgi->td({colspan => 2}, $targetMenu));  
     push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),  
                          $cgi->td({colspan => 2}, $excludeMenu));  
119      # Create the table.      # Create the table.
120      $retVal .= $self->MakeTable(\@rows);      $retVal .= $self->MakeTable(\@rows);
121      # Close the form.      # Close the form.
# Line 135  Line 144 
144      # Declare the return variable. If it remains undefined, the caller will      # Declare the return variable. If it remains undefined, the caller will
145      # assume there was an error.      # assume there was an error.
146      my $retVal;      my $retVal;
147        # Create the timers.
148        my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);
149      # Validate the numeric parameters.      # Validate the numeric parameters.
150      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
151      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
# Line 158  Line 169 
169              # Insure the given genome is in the target set.              # Insure the given genome is in the target set.
170              $targetGenomes{$givenGenomeID} = 1              $targetGenomes{$givenGenomeID} = 1
171          }          }
172            # Find out if we want to use a statistical analysis.
173            my $statistical = $cgi->param('statistical') || 0;
174          # Denote we have not yet found any genomes.          # Denote we have not yet found any genomes.
175          $retVal = 0;          $retVal = 0;
176          #TODO: find stuff          # Compute the list of genomes of interest.
177            my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
178            # Get the BBH matrix.
179            $saveTime = time();
180            my %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
181            $bbhTimer += time() - $saveTime;
182            # Create a feature query object to loop through the chosen features of the given
183            # genome.
184            Trace("Creating feature query.") if T(3);
185            $saveTime = time();
186            my $fquery = FeatureQuery->new($self, $givenGenomeID);
187            $queryTimer += time() - $saveTime;
188            # Get the sizes of the two sets. This information is useful in computing commonality.
189            my $targetSetSize = scalar keys %targetGenomes;
190            my $exclusionSetSize = scalar keys %exclusionGenomes;
191            # Loop through the features.
192            my $done = 0;
193            while (! $done) {
194                # Get the next feature.
195                $saveTime = time();
196                my $record = $fquery->Fetch();
197                $queryTimer += time() - $saveTime;
198                if (! $record) {
199                    $done = 1;
200                } else {
201                    # Get the feature's ID.
202                    my $fid = $fquery->FID();
203                    Trace("Processing feature $fid.") if T(4);
204                    # Get its list of BBHs. The list is actually a hash mapping each BBH to its
205                    # score. All we care about, however, are the BBHs themselves.
206                    my $bbhList = $bbhMatrix{$fid};
207                    # We next wish to loop through the BBH IDs, counting how many are in each of the
208                    # sets. If a genome occurs twice, we only want to count the first occurrence, so
209                    # we have a hash of genomes we've already seen.
210                    my %alreadySeen = ();
211                    # Clear the exclusion count.
212                    my $exclusionCount = 0;
213                    # Denote that we're in our own genome.
214                    $alreadySeen{$givenGenomeID} = 1;
215                    my $targetCount = 1;
216                    # Loop through the BBHs.
217                    for my $bbhPeg (keys %{$bbhList}) {
218                        # Get the genome ID. We want to find out if this genome is new.
219                        my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
220                        if (! $alreadySeen{$genomeID}) {
221                            # It's new, so we check to see which set it's in.
222                            if ($targetGenomes{$genomeID}) {
223                                $targetCount++;
224                            } elsif ($exclusionGenomes{$genomeID}) {
225                                $exclusionCount++;
226                            }
227                            # Make sure we don't look at it again.
228                            $alreadySeen{$genomeID} = 1;
229                        }
230                    }
231                    # Create a variable to indicate whether or not we want to keep this feature.
232                    my $okFlag;
233                    # We need to see if we're using statistics or not. This only matters
234                    # for a two-set situation.
235                    if ($statistical && $exclusionSetSize > 0) {
236                        # This looks like it has something to do with variance computations,
237                        # but I'm not sure.
238                        my $targetNotCount = $targetSetSize - $targetCount;
239                        my $targetSquare = $targetCount * $targetCount + $targetNotCount * $targetNotCount;
240                        my $exclusionNotCount = $exclusionSetSize - $exclusionCount;
241                        my $exclusionSquare = $exclusionCount * $exclusionCount + $exclusionNotCount * $exclusionNotCount;
242                        my $mixed = $targetCount * $exclusionCount + $targetNotCount * $exclusionNotCount;
243                        my $inD = 1 - (($exclusionSetSize * $mixed) / ($targetSetSize * $exclusionSquare));
244                        my $outD = 1 - (($targetSetSize * $mixed) / ($exclusionSetSize * $targetSquare));
245                        # If the two differentials are greater than one, we keep this feature.
246                        $okFlag = ($inD + $outD > 1);
247                    } else {
248                        # Check to see if we're common in set 1 and not in set 2.
249                        if (IsCommon($targetCount, $targetSetSize, $commonality) &&
250                            ! IsCommon($exclusionCount, $exclusionSetSize, $commonality)) {
251                            # We satisfy the criterion, so we put this feature to the output.
252                            $okFlag = 1;
253                        }
254      }      }
255                    if ($okFlag) {
256                        # Put this feature to the output.
257                        $saveTime = time();
258                        $self->PutFeature($fquery);
259                        $putTimer += time() - $saveTime;
260                        # Increase the result count.
261                        $retVal++;
262                    }
263                    # Check for a timer trace. We trace every 500 features.
264                    $loopCounter++;
265                    if (T(3) && $loopCounter % 500 == 0) {
266                        Trace("Time spent for $loopCounter features: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.");
267                    }
268                }
269            }
270            # Close the session file.
271            $saveTime = time();
272            $self->CloseSession();
273            $putTimer += time() - $saveTime;
274        }
275        # Trace the timers.
276        Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);
277      # Return the result count.      # Return the result count.
278      return $retVal;      return $retVal;
279  }  }
# Line 180  Line 292 
292      # Get the parameters.      # Get the parameters.
293      my ($self) = @_;      my ($self) = @_;
294      # Return the result.      # Return the result.
295      return "Search for features that are common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
296    }
297    
298    =head2 Internal Utilities
299    
300    =head3 IsCommon
301    
302    C<< my $flag = SHSigGenes::IsCommon($count, $size, $commonality); >>
303    
304    Return TRUE if a specified count indicates a gene is common in a specified set.
305    Commonality is computed by dividing the count by the size of the set and
306    comparing the result to the minimum commonality ratio. The one exception is
307    if the set size is 0. In that case, this method always returns FALSE.
308    
309    =over 4
310    
311    =item count
312    
313    Number of elements of the set that have the relevant characteristic.
314    
315    =item size
316    
317    Total number of elements in the set.
318    
319    =item commonality
320    
321    Minimum count/size ratio for the characteristic to be considered common.
322    
323    =item RETURN
324    
325    Returns TRUE if the characteristic is common, else FALSE.
326    
327    =back
328    
329    =cut
330    
331    sub IsCommon {
332        # Get the parameters.
333        my ($count, $size, $commonality) = @_;
334        # Declare the return variable.
335        my $retVal = 0;
336        # Only procced if the size is positive.
337        if ($size > 0) {
338            $retVal = ($count/$size >= $commonality);
339        }
340        # Return the result.
341        return $retVal;
342  }  }
343    
344  1;  1;

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.10

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3