[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Sep 27 16:55:38 2006 UTC revision 1.7, Wed Nov 15 12:13:40 2006 UTC
# Line 61  Line 61 
61    
62  =head3 Form  =head3 Form
63    
64  C<< my $html = $shelp->Include(); >>  C<< my $html = $shelp->Form(); >>
65    
66  Generate the HTML for a form to request a new search.  Generate the HTML for a form to request a new search.
67    
# Line 80  Line 80 
80      # there is the selector for the given genome, the commonality and cutoff values,      # there is the selector for the given genome, the commonality and cutoff values,
81      # and the submit button. Our first task, then, is to get the genome selection      # and the submit button. Our first task, then, is to get the genome selection
82      # menus.      # menus.
83      my $givenMenu   = $self->NmpdrGenomeMenu('given', { size => 1 }, [$cgi->param('genome')]);      my $givenMenu   = $self->NmpdrGenomeMenu('given', 0, [$cgi->param('given')]);
84      my $targetMenu  = $self->NmpdrGenomeMenu('target', { size => 10, multiple => 'multiple' },      my $targetMenu  = $self->NmpdrGenomeMenu('target', 'multiple', [$cgi->param('target')], 10, 'exclusion');
85                                               [$cgi->param('target')]);      my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', 'multiple', [$cgi->param('exclusion')], 10, 'target');
     my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', { size => 10, multiple => 'multiple' },  
                                              [$cgi->param('exclusion')]);  
86      # Get the default values to use for the commonality and cutoff controls.      # Get the default values to use for the commonality and cutoff controls.
87      my $commonality = $cgi->param('commonality') || "0.8";      my $commonality = $cgi->param('commonality') || "0.8";
88      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
89        my $statistical = $cgi->param('statistical') || 1;
90      # Now we build the table rows. The top contains the two numeric parameters and      # Now we build the table rows. The top contains the two numeric parameters and
91      # the submit button.      # the submit button.
92      my @rows = ();      my @rows = ();
93      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
94                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
95                                                    -value => $commonality,                                                    -value => $commonality,
96                                                    -size => 5)));                                                    -size => 5) . "&nbsp;" .
97                                      $cgi->checkbox(-name => 'statistical',
98                                                     -checked => $statistical,
99                                                     -value => 1,
100                                                     -label => 'Use Statistical Algorithm')));
101      push @rows, $cgi->Tr($cgi->td("Cutoff"),      push @rows, $cgi->Tr($cgi->td("Cutoff"),
102                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
103                                                    -value => $cutoff,                                                    -value => $cutoff,
# Line 150  Line 153 
153          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
154      } else {      } else {
155          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
156          my $givenGenomeID = $cgi->param('given');          my ($givenGenomeID) = $self->GetGenomes('given');
157          my %targetGenomes = map { $_ => 1 } $cgi->param('target');          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
158          my %exclusionGenomes = map { $_ => 1 } $cgi->param('exclusion');          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
159          # Insure the given genome is not in the exclusion set.          # Insure the given genome is not in the exclusion set.
160          if ($exclusionGenomes{$givenGenomeID}) {          if ($exclusionGenomes{$givenGenomeID}) {
161              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
# Line 160  Line 163 
163              # Insure the given genome is in the target set.              # Insure the given genome is in the target set.
164              $targetGenomes{$givenGenomeID} = 1              $targetGenomes{$givenGenomeID} = 1
165          }          }
166            # Find out if we want to use a statistical analysis.
167            my $statistical = $cgi->param('statistical') || 0;
168          # Denote we have not yet found any genomes.          # Denote we have not yet found any genomes.
169          $retVal = 0;          $retVal = 0;
170          #TODO: find stuff          # Create a feature query object to loop through the chosen features of the given
171            # genome.
172            my $fquery = FeatureQuery->new($self, $givenGenomeID);
173            # Get the sizes of the two sets. This information is useful in computing commonality.
174            my $targetSetSize = scalar keys %targetGenomes;
175            my $exclusionSetSize = scalar keys %exclusionGenomes;
176            # Loop through the features.
177            while (my $record = $fquery->Fetch()) {
178                # Get the feature's ID.
179                my $fid = $fquery->FID();
180                # Request its list of BBHs. The list is actually a hash mapping each BBH to its
181                # score. All we care about, however, are the BBHs themselves.
182                my %bbhList = $sprout->LowBBHs($fid, $cutoff);
183                # We next wish to loop through the BBH IDs, counting how many are in each of the
184                # sets. If a genome occurs twice, we only want to count the first occurrence, so
185                # we have a hash of genomes we've already seen.
186                my %alreadySeen = ();
187                # Clear the counts.
188                my ($targetCount, $exclusionCount) = (0, 0);
189                # Loop through the BBHs.
190                for my $bbhPeg (keys %bbhList) {
191                    # Get the genome ID. We want to find out if this genome is new.
192                    my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
193                    if (! $alreadySeen{$genomeID}) {
194                        # It's new, so we check to see which set it's in.
195                        if ($targetGenomes{$genomeID}) {
196                            $targetCount++;
197                        } elsif ($exclusionGenomes{$genomeID}) {
198                            $exclusionCount++;
199                        }
200                        # Make sure we don't look at it again.
201                        $alreadySeen{$genomeID} = 1;
202                    }
203                }
204                # Create a variable to indicate whether or not we want to keep this feature.
205                my $okFlag;
206                # We need to see if we're using statistics or not. This only matters
207                # for a two-set situation.
208                if ($statistical && $exclusionSetSize > 0) {
209                    # This looks like it has something to do with variance computations,
210                    # but I'm not sure.
211                    my $targetNotCount = $targetSetSize - $targetCount;
212                    my $targetSquare = $targetCount * $targetCount + $targetNotCount * $targetNotCount;
213                    my $exclusionNotCount = $exclusionSetSize - $exclusionCount;
214                    my $exclusionSquare = $exclusionCount * $exclusionCount + $exclusionNotCount * $exclusionNotCount;
215                    my $mixed = $targetCount * $exclusionCount + $targetNotCount * $exclusionNotCount;
216                    my $inD = 1 - (($exclusionSetSize * $mixed) / ($targetSetSize * $exclusionSquare));
217                    my $outD = 1 - (($targetSetSize * $mixed) / ($exclusionSetSize * $targetSquare));
218                    # If the two differentials are greater than one, we keep this feature.
219                    $okFlag = ($inD + $outD > 1);
220                } else {
221                    # Check to see if we're common in set 1 and not in set 2.
222                    if (IsCommon($targetCount, $targetSetSize, $commonality) &&
223                        ! IsCommon($exclusionCount, $exclusionSetSize, $commonality)) {
224                        # We satisfy the criterion, so we put this feature to the output.
225                        $okFlag = 1;
226                    }
227                }
228                if ($okFlag) {
229                    # Put this feature to the output.
230                    $self->PutFeature($fquery);
231                    # Increase the result count.
232                    $retVal++;
233                }
234            }
235            # Close the session file.
236            $self->CloseSession();
237      }      }
238      # Return the result count.      # Return the result count.
239      return $retVal;      return $retVal;
# Line 182  Line 253 
253      # Get the parameters.      # Get the parameters.
254      my ($self) = @_;      my ($self) = @_;
255      # Return the result.      # Return the result.
256      return "Search for features that common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for features that are common to a group of organisms or that discriminate between two groups of organisms.";
257    }
258    
259    =head2 Internal Utilities
260    
261    =head3 IsCommon
262    
263    C<< my $flag = SHSigGenes::IsCommon($count, $size, $commonality); >>
264    
265    Return TRUE if a specified count indicates a gene is common in a specified set.
266    Commonality is computed by dividing the count by the size of the set and
267    comparing the result to the minimum commonality ratio. The one exception is
268    if the set size is 0. In that case, this method always returns FALSE.
269    
270    =over 4
271    
272    =item count
273    
274    Number of elements of the set that have the relevant characteristic.
275    
276    =item size
277    
278    Total number of elements in the set.
279    
280    =item commonality
281    
282    Minimum count/size ratio for the characteristic to be considered common.
283    
284    =item RETURN
285    
286    Returns TRUE if the characteristic is common, else FALSE.
287    
288    =back
289    
290    =cut
291    
292    sub IsCommon {
293        # Get the parameters.
294        my ($count, $size, $commonality) = @_;
295        # Declare the return variable.
296        my $retVal = 0;
297        # Only procced if the size is positive.
298        if ($size > 0) {
299            $retVal = ($count/$size >= $commonality);
300        }
301        # Return the result.
302        return $retVal;
303  }  }
304    
305  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3