[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Wed Feb 21 13:20:37 2007 UTC revision 1.15, Thu May 17 23:44:21 2007 UTC
# Line 95  Line 95 
95      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
96      my $statistical = $cgi->param('statistical') || 1;      my $statistical = $cgi->param('statistical') || 1;
97      my $showMatch = $cgi->param('showMatch') || 0;      my $showMatch = $cgi->param('showMatch') || 0;
98        my $useSims = $cgi->param('useSims') || 0;
99        my $pegsOnly = $cgi->param('pegsOnly') || 1;
100      # Now we build the table rows.      # Now we build the table rows.
101      my @rows = ();      my @rows = ();
102      # First we have the given genome.      # First we have the given genome.
# Line 105  Line 107 
107                           $cgi->td({colspan => 2}, $targetMenu));                           $cgi->td({colspan => 2}, $targetMenu));
108      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
109                           $cgi->td({colspan => 2}, $excludeMenu));                           $cgi->td({colspan => 2}, $excludeMenu));
110      # Next, the numeric parameters.      # Next, the tuning parameters.
111      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
112                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
113                                                    -value => $commonality,                                                    -value => $commonality,
# Line 114  Line 116 
116                                    $cgi->checkbox(-name => 'statistical',                                    $cgi->checkbox(-name => 'statistical',
117                                                   -checked => $statistical,                                                   -checked => $statistical,
118                                                   -value => 1,                                                   -value => 1,
119                                                   -label => 'Use Statistical Algorithm')),                                                   -label => 'Use Statistical Algorithm'),
120                                      $cgi->checkbox(-name => 'useSims',
121                                                     -checked => $useSims,
122                                                     -value => 1,
123                                                     -label => 'Use Similarities')))),
124                    $cgi->Tr($cgi->td(), $cgi->td(join(" ",
125                                    $cgi->checkbox(-name => 'showMatch',                                    $cgi->checkbox(-name => 'showMatch',
126                                                   -checked => $showMatch,                                                   -checked => $showMatch,
127                                                   -value => 1,                                                   -value => 1,
128                                                   -label => 'Show Matching Genes')));                                                   -label => 'Show Matching Genes'),
129      push @rows, $cgi->Tr($cgi->td("Cutoff"),  #                                  $cgi->checkbox(-name => 'pegsOnly',
130    #                                                 -checked => $pegsOnly,
131    #                                                 -value => 1,
132    #                                                 -label => 'PEGs Only')
133                                      ))),
134                    $cgi->Tr($cgi->td("Cutoff"),
135                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
136                                                    -value => $cutoff,                                                    -value => $cutoff,
137                                                    -size => 5)));                                                    -size => 5)));
# Line 156  Line 168 
168      # assume there was an error.      # assume there was an error.
169      my $retVal;      my $retVal;
170      # Denote the extra columns go at the end.      # Denote the extra columns go at the end.
171      $self->SetExtraPos(1);      $self->SetExtraPos('score');
172      # Create the timers.      # Create the timers.
173      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);
174      # Validate the numeric parameters.      # Validate the numeric parameters.
175      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
176      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
177        my $pegsOnly = $cgi->param('pegsOnly') || 1;
178      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
179          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
180      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 172  Line 185 
185          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
186      } else {      } else {
187          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
188            $self->PrintLine("Gathering the target genomes.  ");
189          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
190          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
191            $self->PrintLine("Gathering the exclusion genomes.  ");
192          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
193            $self->PrintLine("Validating the genome sets.<br />");
194          # Insure the given genome is not in the exclusion set.          # Insure the given genome is not in the exclusion set.
195          if ($exclusionGenomes{$givenGenomeID}) {          if ($exclusionGenomes{$givenGenomeID}) {
196              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
# Line 190  Line 206 
206          $retVal = 0;          $retVal = 0;
207          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
208          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
209          # Get the BBH matrix.          # Set the parameter that indicates whether or not we're in PEGs-only mode.
210            my $pegMode = ($pegsOnly ? 'peg' : undef);
211            # Get the peg matrix.
212            Trace("Requesting matrix.") if T(3);
213          $saveTime = time();          $saveTime = time();
214          my %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);          my %bbhMatrix;
215            if (! $cgi->param('useSims')) {
216                # Here we are using BBHs, which are fast enough to do in one gulp.
217                $self->PrintLine("Requesting bidirectional best hits.  ");
218                %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
219            } else {
220                # Here we are using similarities, which is much more complicated.
221                $self->PrintLine("Requesting similarities.<br />");
222                # Create a filtering matrix for the results. We only want to keep PEGs in the
223                # specified target and exclusion genomes.
224                my %keepGenomes = map { $_ => 1 } @allGenomes;
225                # Loop through the given genome's features.
226                my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);
227                for my $fid (@features) {
228                    $self->PrintLine("Retrieving similarities for $fid.  ");
229                    # Get this feature's similarities.
230                    my $simList = $sprout->Sims($fid, 1000, $cutoff, 'fig');
231                    my $simCount = scalar @{$simList};
232                    $self->PrintLine("Raw similarity count: $simCount.  ");
233                    # Create the matrix hash for this feature.
234                    $bbhMatrix{$fid} = {};
235                    # Now we need to filter out the similarities that don't land on the target genome.
236                    $simCount = 0;
237                    for my $sim (@{$simList}) {
238                        # Insure this similarity lands on a target genome.
239                        my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);
240                        if ($keepGenomes{$genomeID2}) {
241                            # Here we're keeping the similarity, so we put it in this feature's hash.
242                            $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;
243                            $simCount++;
244                        }
245                    }
246                    $self->PrintLine("Similarities retained: $simCount.<br />");
247                }
248            }
249          $bbhTimer += time() - $saveTime;          $bbhTimer += time() - $saveTime;
250            $self->PrintLine("Time to build matrix: $bbhTimer seconds.<br />");
251            Trace("Matrix built.") if T(3);
252          # Create a feature query object to loop through the chosen features of the given          # Create a feature query object to loop through the chosen features of the given
253          # genome.          # genome.
254          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
# Line 215  Line 270 
270              } else {              } else {
271                  # Get the feature's ID.                  # Get the feature's ID.
272                  my $fid = $fquery->FID();                  my $fid = $fquery->FID();
273                  Trace("Processing feature $fid.") if T(4);                  # Insure we want to look at this feature.
274                    if ($fid =~ /\.peg\./ || ! $pegsOnly) {
275                        Trace("Checking feature $fid.") if T(4);
276                        $self->PrintLine("Checking feature $fid.<br />");
277                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its
278                  # score. All we care about, however, are the BBHs themselves.                  # score. All we care about, however, are the BBHs themselves.
279                  my $bbhList = $bbhMatrix{$fid};                  my $bbhList = $bbhMatrix{$fid};
# Line 232  Line 290 
290                  # Denote that we're in our own genome.                  # Denote that we're in our own genome.
291                  $alreadySeen{$givenGenomeID} = 0;                  $alreadySeen{$givenGenomeID} = 0;
292                  my $targetCount = 1;                  my $targetCount = 1;
293                  # Loop through the BBHs.                      # Loop through the BBHs/Sims.
294                  for my $bbhPeg (keys %{$bbhList}) {                  for my $bbhPeg (keys %{$bbhList}) {
295                      # Get the genome ID. We want to find out if this genome is new.                      # Get the genome ID. We want to find out if this genome is new.
296                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
# Line 272  Line 330 
330                      # If the two differentials are greater than one, we keep this feature.                      # If the two differentials are greater than one, we keep this feature.
331                      $score = $inD + $outD;                      $score = $inD + $outD;
332                      $okFlag = ($score > 1);                      $okFlag = ($score > 1);
333                            # Subtract 1 from the score so it looks like the commonality score.
334                            $score -= 1.0;
335                  } else {                  } else {
336                      # Check to see if we're common in set 1 and not in set 2.                      # Check to see if we're common in set 1 and not in set 2.
337                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);
338                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);
339                      if ($score1 && ! $score2) {                      if ($score1 && ! $score2) {
340                          # We satisfy the criterion, so we put this feature to the output. The                          # We satisfy the criterion, so we put this feature to the output. The
341                          # score is normalize to a range similar to the scores in the statistical                              # score is essentially $score1, since $score2 is zero.
342                          # method.                              $score = $score1;
                         $score = $score1 + (1 - $score2);  
343                          $okFlag = 1;                          $okFlag = 1;
344                      }                      }
345                  }                  }
# Line 310  Line 369 
369                  }                  }
370              }              }
371          }          }
372            }
373          # Close the session file.          # Close the session file.
374          $saveTime = time();          $saveTime = time();
375          $self->CloseSession();          $self->CloseSession();
# Line 338  Line 398 
398      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
399  }  }
400    
401    =head3 SortKey
402    
403    C<< my $key = $shelp->SortKey($fdata); >>
404    
405    Return the sort key for the specified feature data. The default is to sort by feature name,
406    floating NMPDR organisms to the top. If a full-text search is used, then the default
407    sort is by relevance followed by feature name. This sort may be overridden by the
408    search class to provide fancier functionality. This method is called by
409    B<PutFeature>, so it is only used for feature searches. A non-feature search
410    would presumably have its own sort logic.
411    
412    =over 4
413    
414    =item record
415    
416    The C<FeatureData> containing the current feature.
417    
418    =item RETURN
419    
420    Returns a key field that can be used to sort this row in among the results.
421    
422    =back
423    
424    =cut
425    
426    sub SortKey {
427        # Get the parameters.
428        my ($self, $fdata) = @_;
429        # Get the score.
430        my $retVal = $fdata->GetExtraColumn('score');
431        # Invert it to create a sort with the high score first.
432        $retVal = sprintf("%0.3f", 1 - $retVal);
433        Trace("Sort key for " . $fdata->FID() . " is $retVal.") if T(4);
434        # Return the result.
435        return $retVal;
436    }
437    
438    
439    
440  =head2 Internal Utilities  =head2 Internal Utilities
441    
442  =head3 IsCommon  =head3 IsCommon

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.15

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3