[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Tue Sep 26 13:46:03 2006 UTC revision 1.4, Wed Oct 4 16:03:55 2006 UTC
# Line 80  Line 80 
80      # there is the selector for the given genome, the commonality and cutoff values,      # there is the selector for the given genome, the commonality and cutoff values,
81      # and the submit button. Our first task, then, is to get the genome selection      # and the submit button. Our first task, then, is to get the genome selection
82      # menus.      # menus.
83      my $givenMenu   = $self->NmpdrGenomeMenu('given', { size => 1 }, [$cgi->param('genome')]);      my $givenMenu   = $self->NmpdrGenomeMenu('given', 0, [$cgi->param('genome')]);
84      my $targetMenu  = $self->NmpdrGenomeMenu('target', { size => 10, multiple => 'multiple' },      my $targetMenu  = $self->NmpdrGenomeMenu('target', 'multiple', [$cgi->param('target')], 10, 'exclusion');
85                                               [$cgi->param('target')]);      my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', 'multiple', [$cgi->param('exclusion')], 10, 'target');
     my $excludeMenu = $self->NmpdrGenomeMenu('exclusion', { size => 10, multiple => 'multiple' },  
                                              [$cgi->param('exclusion')]);  
86      # Get the default values to use for the commonality and cutoff controls.      # Get the default values to use for the commonality and cutoff controls.
87      my $commonality = $cgi->param('commonality') || "0.8";      my $commonality = $cgi->param('commonality') || "0.8";
88      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
# Line 100  Line 98 
98                                                    -value => $cutoff,                                                    -value => $cutoff,
99                                                    -size => 5)));                                                    -size => 5)));
100      push @rows, $self->SubmitRow();      push @rows, $self->SubmitRow();
101      # The next rows have the three genome menus.      # The next rows have the given genome and a feature filter.
102      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Given Genome"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Given Genome"),
103                           $cgi->td({colspan => 2}, $givenMenu));                           $cgi->td({colspan => 2}, $givenMenu));
104        push @rows, $self->FeatureFilterRows();
105        # Now show the target and exclusion menus.
106      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Target Genomes (Set 1)"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Target Genomes (Set 1)"),
107                           $cgi->td({colspan => 2}, $targetMenu));                           $cgi->td({colspan => 2}, $targetMenu));
108      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
# Line 132  Line 132 
132      # Get the sprout and CGI query objects.      # Get the sprout and CGI query objects.
133      my $cgi = $self->Q();      my $cgi = $self->Q();
134      my $sprout = $self->DB();      my $sprout = $self->DB();
135      # Declare the return variable.      # Declare the return variable. If it remains undefined, the caller will
136        # assume there was an error.
137      my $retVal;      my $retVal;
138        # Validate the numeric parameters.
139      # TODO: find stuff      my $commonality = $cgi->param('commonality');
140        my $cutoff = $cgi->param('cutoff');
141        if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
142            $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
143        } elsif ($commonality <= 0 || $commonality > 1) {
144            $self->SetMessage("Commonality cannot be 0 and cannot be greater than 1.");
145        } elsif ($cutoff !~ /^\s*\d(.\d+)?(e\-\d+)?\s*$/) {
146            $self->SetMessage("Cutoff must be an exponential number (e.g. \"1e-20\" or \"2.5e-11\".");
147        } elsif ($cutoff > 1) {
148            $self->SetMessage("Cutoff cannot be greater than 1.");
149        } else {
150            # Now we need to gather and validate the genome sets.
151            my ($givenGenomeID) = $self->GetGenomes('given');
152            my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
153            my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
154            # Insure the given genome is not in the exclusion set.
155            if ($exclusionGenomes{$givenGenomeID}) {
156                $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
157            } else {
158                # Insure the given genome is in the target set.
159                $targetGenomes{$givenGenomeID} = 1
160            }
161            # Denote we have not yet found any genomes.
162            $retVal = 0;
163            # Create a feature query object to loop through the chosen features of the given
164            # genome.
165            my $fquery = FeatureQuery->new($self, $givenGenomeID);
166            # Get the sizes of the two sets. This information is useful in computing commonality.
167            my $targetSetSize = scalar keys %targetGenomes;
168            my $exclusionSetSize = scalar keys %exclusionGenomes;
169            # Loop through the features.
170            while (my $record = $fquery->Fetch()) {
171                # Get the feature's ID.
172                my $fid = $fquery->FID();
173                Trace("Processing feature $fid.") if T(3);
174                # Request its list of BBHs. The list is actually a hash mapping each BBH to its
175                # score. All we care about, however, are the BBHs themselves.
176                my %bbhList = $sprout->LowBBHs($fid, $cutoff);
177                # We next wish to loop through the BBH IDs, counting how many are in each of the
178                # sets. If a genome occurs twice, we only want to count the first occurrence, so
179                # we have a hash of genomes we've already seen.
180                my %alreadySeen = ();
181                # Clear the counts.
182                my ($targetCount, $exclusionCount) = (0, 0);
183                # Loop through the BBHs.
184                for my $bbhPeg (keys %bbhList) {
185                    # Get the genome ID. We want to find out if this genome is new.
186                    my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
187                    if (! $alreadySeen{$genomeID}) {
188                        # It's new, so we check to see which set it's in.
189                        if ($targetGenomes{$genomeID}) {
190                            $targetCount++;
191                        } elsif ($exclusionGenomes{$genomeID}) {
192                            $exclusionCount++;
193                        }
194                        # Make sure we don't look at it again.
195                        $alreadySeen{$genomeID} = 1;
196                    }
197                }
198                # Check to see if we're common in set 1 and not in set 2.
199                if (IsCommon($targetCount, $targetSetSize, $commonality) &&
200                    ! IsCommon($exclusionCount, $exclusionSetSize, $commonality)) {
201                    # We satisfy the criterion, so we put this feature to the output.
202                    $self->PutFeature($fquery);
203                    # Increase the result count.
204                    $retVal++;
205                }
206            }
207        }
208      # Return the result count.      # Return the result count.
209      return $retVal;      return $retVal;
210  }  }
# Line 155  Line 223 
223      # Get the parameters.      # Get the parameters.
224      my ($self) = @_;      my ($self) = @_;
225      # Return the result.      # Return the result.
226      return "Search for features that common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for features that are common to a group of organisms or that discriminate between two groups of organisms.";
227    }
228    
229    =head2 Internal Utilities
230    
231    =head3 IsCommon
232    
233    C<< my $flag = SHSigGenes::IsCommon($count, $size, $commonality); >>
234    
235    Return TRUE if a specified count indicates a gene is common in a specified set.
236    Commonality is computed by dividing the count by the size of the set and
237    comparing the result to the minimum commonality ratio. The one exception is
238    if the set size is 0. In that case, this method always returns FALSE.
239    
240    =over 4
241    
242    =item count
243    
244    Number of elements of the set that have the relevant characteristic.
245    
246    =item size
247    
248    Total number of elements in the set.
249    
250    =item commonality
251    
252    Minimum count/size ratio for the characteristic to be considered common.
253    
254    =item RETURN
255    
256    Returns TRUE if the characteristic is common, else FALSE.
257    
258    =back
259    
260    =cut
261    
262    sub IsCommon {
263        # Get the parameters.
264        my ($count, $size, $commonality) = @_;
265        # Declare the return variable.
266        my $retVal = 0;
267        # Only procced if the size is positive.
268        if ($size > 0) {
269            $retVal = ($count/$size >= $commonality);
270        }
271        # Return the result.
272        return $retVal;
273  }  }
274    
275  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3