[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Sep 26 13:46:03 2006 UTC revision 1.9, Sat Dec 2 09:46:01 2006 UTC
# Line 61  Line 61 
61    
62  =head3 Form  =head3 Form
63    
64  C<< my $html = $shelp->Include(); >>  C<< my $html = $shelp->Form(); >>
65    
66  Generate the HTML for a form to request a new search.  Generate the HTML for a form to request a new search.
67    
# Line 80  Line 80 
80      # there is the selector for the given genome, the commonality and cutoff values,      # there is the selector for the given genome, the commonality and cutoff values,
81      # and the submit button. Our first task, then, is to get the genome selection      # and the submit button. Our first task, then, is to get the genome selection
82      # menus.      # menus.
83      my $givenMenu   = $self->NmpdrGenomeMenu('given', { size => 1 }, [$cgi->param('genome')]);      my $givenMenu   = $self->NmpdrGenomeMenu('given', 0, [$cgi->param('given')]);
84      my $targetMenu  = $self->NmpdrGenomeMenu('target', { size => 10, multiple => 'multiple' },      my $targetMenu  = $self->NmpdrGenomeMenu('target', 'multiple', [$cgi->param('target')], 10, 'exclusion');
85                                               [$cgi->param('target')]);      my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', 'multiple', [$cgi->param('exclusion')], 10, 'target');
     my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', { size => 10, multiple => 'multiple' },  
                                              [$cgi->param('exclusion')]);  
86      # Get the default values to use for the commonality and cutoff controls.      # Get the default values to use for the commonality and cutoff controls.
87      my $commonality = $cgi->param('commonality') || "0.8";      my $commonality = $cgi->param('commonality') || "0.8";
88      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
89      # Now we build the table rows. The top contains the two numeric parameters and      my $statistical = $cgi->param('statistical') || 1;
90      # the submit button.      # Now we build the table rows.
91      my @rows = ();      my @rows = ();
92        # First we have the given genome.
93        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Reference Genome"),
94                             $cgi->td({colspan => 2}, $givenMenu));
95        # Now show the target and exclusion menus.
96        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Inclusion Genomes (Set 1)"),
97                             $cgi->td({colspan => 2}, $targetMenu));
98        push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
99                             $cgi->td({colspan => 2}, $excludeMenu));
100        # Next, the numeric parameters.
101      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
102                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
103                                                    -value => $commonality,                                                    -value => $commonality,
104                                                    -size => 5)));                                                    -size => 5))),
105                    $cgi->Tr($cgi->td(), $cgi->td(
106                                      $cgi->checkbox(-name => 'statistical',
107                                                     -checked => $statistical,
108                                                     -value => 1,
109                                                     -label => 'Use Statistical Algorithm')));
110      push @rows, $cgi->Tr($cgi->td("Cutoff"),      push @rows, $cgi->Tr($cgi->td("Cutoff"),
111                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
112                                                    -value => $cutoff,                                                    -value => $cutoff,
113                                                    -size => 5)));                                                    -size => 5)));
114        # Next, the feature filter rows.
115        push @rows, $self->FeatureFilterRows();
116        # Finally, the submit button.
117      push @rows, $self->SubmitRow();      push @rows, $self->SubmitRow();
     # The next rows have the three genome menus.  
     push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Given Genome"),  
                          $cgi->td({colspan => 2}, $givenMenu));  
     push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Target Genomes (Set 1)"),  
                          $cgi->td({colspan => 2}, $targetMenu));  
     push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),  
                          $cgi->td({colspan => 2}, $excludeMenu));  
118      # Create the table.      # Create the table.
119      $retVal .= $self->MakeTable(\@rows);      $retVal .= $self->MakeTable(\@rows);
120      # Close the form.      # Close the form.
# Line 132  Line 140 
140      # Get the sprout and CGI query objects.      # Get the sprout and CGI query objects.
141      my $cgi = $self->Q();      my $cgi = $self->Q();
142      my $sprout = $self->DB();      my $sprout = $self->DB();
143      # Declare the return variable.      # Declare the return variable. If it remains undefined, the caller will
144        # assume there was an error.
145      my $retVal;      my $retVal;
146        # Validate the numeric parameters.
147      # TODO: find stuff      my $commonality = $cgi->param('commonality');
148        my $cutoff = $cgi->param('cutoff');
149        if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
150            $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
151        } elsif ($commonality <= 0 || $commonality > 1) {
152            $self->SetMessage("Commonality cannot be 0 and cannot be greater than 1.");
153        } elsif ($cutoff !~ /^\s*\d(.\d+)?(e\-\d+)?\s*$/) {
154            $self->SetMessage("Cutoff must be an exponential number (e.g. \"1e-20\" or \"2.5e-11\".");
155        } elsif ($cutoff > 1) {
156            $self->SetMessage("Cutoff cannot be greater than 1.");
157        } else {
158            # Now we need to gather and validate the genome sets.
159            my ($givenGenomeID) = $self->GetGenomes('given');
160            my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
161            my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
162            # Insure the given genome is not in the exclusion set.
163            if ($exclusionGenomes{$givenGenomeID}) {
164                $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
165            } else {
166                # Insure the given genome is in the target set.
167                $targetGenomes{$givenGenomeID} = 1
168            }
169            # Find out if we want to use a statistical analysis.
170            my $statistical = $cgi->param('statistical') || 0;
171            # Denote we have not yet found any genomes.
172            $retVal = 0;
173            # Create a feature query object to loop through the chosen features of the given
174            # genome.
175            my $fquery = FeatureQuery->new($self, $givenGenomeID);
176            # Get the sizes of the two sets. This information is useful in computing commonality.
177            my $targetSetSize = scalar keys %targetGenomes;
178            my $exclusionSetSize = scalar keys %exclusionGenomes;
179            # Loop through the features.
180            while (my $record = $fquery->Fetch()) {
181                # Get the feature's ID.
182                my $fid = $fquery->FID();
183                # Request its list of BBHs. The list is actually a hash mapping each BBH to its
184                # score. All we care about, however, are the BBHs themselves.
185                my %bbhList = $sprout->LowBBHs($fid, $cutoff);
186                # We next wish to loop through the BBH IDs, counting how many are in each of the
187                # sets. If a genome occurs twice, we only want to count the first occurrence, so
188                # we have a hash of genomes we've already seen.
189                my %alreadySeen = ();
190                # Clear the counts.
191                my ($targetCount, $exclusionCount) = (0, 0);
192                # Loop through the BBHs.
193                for my $bbhPeg (keys %bbhList) {
194                    # Get the genome ID. We want to find out if this genome is new.
195                    my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
196                    if (! $alreadySeen{$genomeID}) {
197                        # It's new, so we check to see which set it's in.
198                        if ($targetGenomes{$genomeID}) {
199                            $targetCount++;
200                        } elsif ($exclusionGenomes{$genomeID}) {
201                            $exclusionCount++;
202                        }
203                        # Make sure we don't look at it again.
204                        $alreadySeen{$genomeID} = 1;
205                    }
206                }
207                # Create a variable to indicate whether or not we want to keep this feature.
208                my $okFlag;
209                # We need to see if we're using statistics or not. This only matters
210                # for a two-set situation.
211                if ($statistical && $exclusionSetSize > 0) {
212                    # This looks like it has something to do with variance computations,
213                    # but I'm not sure.
214                    my $targetNotCount = $targetSetSize - $targetCount;
215                    my $targetSquare = $targetCount * $targetCount + $targetNotCount * $targetNotCount;
216                    my $exclusionNotCount = $exclusionSetSize - $exclusionCount;
217                    my $exclusionSquare = $exclusionCount * $exclusionCount + $exclusionNotCount * $exclusionNotCount;
218                    my $mixed = $targetCount * $exclusionCount + $targetNotCount * $exclusionNotCount;
219                    my $inD = 1 - (($exclusionSetSize * $mixed) / ($targetSetSize * $exclusionSquare));
220                    my $outD = 1 - (($targetSetSize * $mixed) / ($exclusionSetSize * $targetSquare));
221                    # If the two differentials are greater than one, we keep this feature.
222                    $okFlag = ($inD + $outD > 1);
223                } else {
224                    # Check to see if we're common in set 1 and not in set 2.
225                    if (IsCommon($targetCount, $targetSetSize, $commonality) &&
226                        ! IsCommon($exclusionCount, $exclusionSetSize, $commonality)) {
227                        # We satisfy the criterion, so we put this feature to the output.
228                        $okFlag = 1;
229                    }
230                }
231                if ($okFlag) {
232                    # Put this feature to the output.
233                    $self->PutFeature($fquery);
234                    # Increase the result count.
235                    $retVal++;
236                }
237            }
238            # Close the session file.
239            $self->CloseSession();
240        }
241      # Return the result count.      # Return the result count.
242      return $retVal;      return $retVal;
243  }  }
# Line 155  Line 256 
256      # Get the parameters.      # Get the parameters.
257      my ($self) = @_;      my ($self) = @_;
258      # Return the result.      # Return the result.
259      return "Search for features that common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
260    }
261    
262    =head2 Internal Utilities
263    
264    =head3 IsCommon
265    
266    C<< my $flag = SHSigGenes::IsCommon($count, $size, $commonality); >>
267    
268    Return TRUE if a specified count indicates a gene is common in a specified set.
269    Commonality is computed by dividing the count by the size of the set and
270    comparing the result to the minimum commonality ratio. The one exception is
271    if the set size is 0. In that case, this method always returns FALSE.
272    
273    =over 4
274    
275    =item count
276    
277    Number of elements of the set that have the relevant characteristic.
278    
279    =item size
280    
281    Total number of elements in the set.
282    
283    =item commonality
284    
285    Minimum count/size ratio for the characteristic to be considered common.
286    
287    =item RETURN
288    
289    Returns TRUE if the characteristic is common, else FALSE.
290    
291    =back
292    
293    =cut
294    
295    sub IsCommon {
296        # Get the parameters.
297        my ($count, $size, $commonality) = @_;
298        # Declare the return variable.
299        my $retVal = 0;
300        # Only procced if the size is positive.
301        if ($size > 0) {
302            $retVal = ($count/$size >= $commonality);
303        }
304        # Return the result.
305        return $retVal;
306  }  }
307    
308  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3