[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Thu May 17 23:44:21 2007 UTC revision 1.17, Mon Aug 20 23:25:32 2007 UTC
# Line 4  Line 4 
4    
5      use strict;      use strict;
6      use Tracer;      use Tracer;
     use SearchHelper;  
7      use CGI;      use CGI;
8      use HTML;      use HTML;
9      use Sprout;      use Sprout;
10      use Time::HiRes;      use Time::HiRes;
11      use FIGRules;      use FIGRules;
12        use RHFeatures;
13      our @ISA = qw(SearchHelper);      use base 'SearchHelper';
14    
15  =head1 Gene Discrimination Feature Search Helper  =head1 Gene Discrimination Feature Search Helper
16    
# Line 116  Line 115 
115                                    $cgi->checkbox(-name => 'statistical',                                    $cgi->checkbox(-name => 'statistical',
116                                                   -checked => $statistical,                                                   -checked => $statistical,
117                                                   -value => 1,                                                   -value => 1,
118                                                   -label => 'Use Statistical Algorithm'),                                                   -label => 'Use Statistical Algorithm') .
119                                      SearchHelper::Hint("SigGenes",
120                                                         "When two sets of genomees are specified, check this " .
121                                                         "box to use a statistical algorithm designed " .
122                                                         "specifically to choose differentiating genes. " .
123                                                         "This box has no effect when looking for genes " .
124                                                         "in common."),
125                                    $cgi->checkbox(-name => 'useSims',                                    $cgi->checkbox(-name => 'useSims',
126                                                   -checked => $useSims,                                                   -checked => $useSims,
127                                                   -value => 1,                                                   -value => 1,
128                                                   -label => 'Use Similarities')))),                                                   -label => 'Use Similarities') .
129                                      SearchHelper::Hint("SigGenes",
130                                                         "Normally, Bidirectional Best Hits are used to " .
131                                                         "find matching genes. Check this box to use " .
132                                                         "similarities instead.")))),
133                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",
134                                    $cgi->checkbox(-name => 'showMatch',                                    $cgi->checkbox(-name => 'showMatch',
135                                                   -checked => $showMatch,                                                   -checked => $showMatch,
136                                                   -value => 1,                                                   -value => 1,
137                                                   -label => 'Show Matching Genes'),                                                   -label => 'Show Matching Genes') .
138  #                                  $cgi->checkbox(-name => 'pegsOnly',                                    SearchHelper::Hint("SigGenes",
139  #                                                 -checked => $pegsOnly,                                                       "Check this button to display the genes matching " .
140  #                                                 -value => 1,                                                       "each gene displayed in the results.")))),
 #                                                 -label => 'PEGs Only')  
                                   ))),  
141                  $cgi->Tr($cgi->td("Cutoff"),                  $cgi->Tr($cgi->td("Cutoff"),
142                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
143                                                    -value => $cutoff,                                                    -value => $cutoff,
144                                                    -size => 5)));                                                    -size => 5)));
145      # Next, the feature filter rows.      # Next, the feature filter rows.
146      push @rows, $self->FeatureFilterRows();      push @rows, RHFeatures::WordSearchRow($self);
147        push @rows, RHFeatures::FeatureFilterFormRows($self);
148      # Finally, the submit button.      # Finally, the submit button.
149      push @rows, $self->SubmitRow();      push @rows, $self->SubmitRow();
150      # Create the table.      # Create the table.
# Line 167  Line 175 
175      # Declare the return variable. If it remains undefined, the caller will      # Declare the return variable. If it remains undefined, the caller will
176      # assume there was an error.      # assume there was an error.
177      my $retVal;      my $retVal;
     # Denote the extra columns go at the end.  
     $self->SetExtraPos('score');  
178      # Create the timers.      # Create the timers.
179      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);
180      # Validate the numeric parameters.      # Validate the numeric parameters.
181      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
182      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
     my $pegsOnly = $cgi->param('pegsOnly') || 1;  
183      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
184          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
185      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 184  Line 189 
189      } elsif ($cutoff > 1) {      } elsif ($cutoff > 1) {
190          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
191      } else {      } else {
192            # Get the result helper.
193            my $rhelp = RHFeatures->new($self);
194            # Set up the default columns.
195            $self->DefaultColumns($rhelp);
196            # Add the score at the end.
197            $rhelp->AddExtraColumn(score => undef, title => 'Score', style => 'rightAlign', download => 'num');
198            # Find out if we need to show matching genes.
199            my $showMatch = $cgi->param('showMatch') || 0;
200            # If we do, add a column for them at the front.
201            if ($showMatch) {
202                $rhelp->AddExtraColumn(matches => 0, title => 'Matches', style => 'leftAlign', download => 'list');
203            }
204            # Only proceed if the filtering parameters are valid.
205            if ($rhelp->Valid()) {
206                # Start the output session.
207                $self->OpenSession($rhelp);
208          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
209          $self->PrintLine("Gathering the target genomes.  ");          $self->PrintLine("Gathering the target genomes.  ");
210          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
# Line 196  Line 217 
217              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
218          } else {          } else {
219              # Insure the given genome is in the target set.              # Insure the given genome is in the target set.
220              $targetGenomes{$givenGenomeID} = 1                  $targetGenomes{$givenGenomeID} = 1;
221          }          }
222          # Find out if we want to use a statistical analysis.          # Find out if we want to use a statistical analysis.
223          my $statistical = $cgi->param('statistical') || 1;          my $statistical = $cgi->param('statistical') || 1;
         # Find out if we need to show matching genes.  
         my $showMatch = $cgi->param('showMatch') || 0;  
224          # Denote we have not yet found any genomes.          # Denote we have not yet found any genomes.
225          $retVal = 0;          $retVal = 0;
226          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
227          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
         # Set the parameter that indicates whether or not we're in PEGs-only mode.  
         my $pegMode = ($pegsOnly ? 'peg' : undef);  
228          # Get the peg matrix.          # Get the peg matrix.
229          Trace("Requesting matrix.") if T(3);          Trace("Requesting matrix.") if T(3);
230          $saveTime = time();          $saveTime = time();
# Line 217  Line 234 
234              $self->PrintLine("Requesting bidirectional best hits.  ");              $self->PrintLine("Requesting bidirectional best hits.  ");
235              %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);              %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
236          } else {          } else {
237              # Here we are using similarities, which is much more complicated.                  # Here we are using similarities, which are much more complicated.
238              $self->PrintLine("Requesting similarities.<br />");              $self->PrintLine("Requesting similarities.<br />");
239              # Create a filtering matrix for the results. We only want to keep PEGs in the              # Create a filtering matrix for the results. We only want to keep PEGs in the
240              # specified target and exclusion genomes.              # specified target and exclusion genomes.
241              my %keepGenomes = map { $_ => 1 } @allGenomes;              my %keepGenomes = map { $_ => 1 } @allGenomes;
242              # Loop through the given genome's features.              # Loop through the given genome's features.
243              my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);                  my @features = $sprout->FeaturesOf($givenGenomeID);
244              for my $fid (@features) {              for my $fid (@features) {
245                  $self->PrintLine("Retrieving similarities for $fid.  ");                  $self->PrintLine("Retrieving similarities for $fid.  ");
246                  # Get this feature's similarities.                  # Get this feature's similarities.
# Line 236  Line 253 
253                  $simCount = 0;                  $simCount = 0;
254                  for my $sim (@{$simList}) {                  for my $sim (@{$simList}) {
255                      # Insure this similarity lands on a target genome.                      # Insure this similarity lands on a target genome.
256                      my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);                          my $genomeID2 = $sprout->GenomeOf($sim->id2);
257                      if ($keepGenomes{$genomeID2}) {                      if ($keepGenomes{$genomeID2}) {
258                          # Here we're keeping the similarity, so we put it in this feature's hash.                          # Here we're keeping the similarity, so we put it in this feature's hash.
259                          $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;                          $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;
# Line 253  Line 270 
270          # genome.          # genome.
271          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
272          $saveTime = time();          $saveTime = time();
273          my $fquery = FeatureQuery->new($self, $givenGenomeID);              my $fquery = $rhelp->GetQuery($givenGenomeID);
274          $queryTimer += time() - $saveTime;          $queryTimer += time() - $saveTime;
275          # Get the sizes of the two sets. This information is useful in computing commonality.          # Get the sizes of the two sets. This information is useful in computing commonality.
276          my $targetSetSize = scalar keys %targetGenomes;          my $targetSetSize = scalar keys %targetGenomes;
# Line 263  Line 280 
280          while (! $done) {          while (! $done) {
281              # Get the next feature.              # Get the next feature.
282              $saveTime = time();              $saveTime = time();
283              my $record = $fquery->Fetch();                  my $record = $rhelp->Fetch($fquery);
284              $queryTimer += time() - $saveTime;              $queryTimer += time() - $saveTime;
285              if (! $record) {              if (! $record) {
286                  $done = 1;                  $done = 1;
287              } else {              } else {
288                  # Get the feature's ID.                  # Get the feature's ID.
289                  my $fid = $fquery->FID();                      my $fid = $record->PrimaryValue('Feature(id)');
                 # Insure we want to look at this feature.  
                 if ($fid =~ /\.peg\./ || ! $pegsOnly) {  
290                      Trace("Checking feature $fid.") if T(4);                      Trace("Checking feature $fid.") if T(4);
291                      $self->PrintLine("Checking feature $fid.<br />");                      $self->PrintLine("Checking feature $fid.<br />");
292                      # Get its list of BBHs. The list is actually a hash mapping each BBH to its                      # Get its list of matching genes. The list is actually a hash mapping each matched gene to its
293                      # score. All we care about, however, are the BBHs themselves.                      # score. All we care about, however, are the matches themselves.
294                      my $bbhList = $bbhMatrix{$fid};                      my $bbhList = $bbhMatrix{$fid};
295                      # We next wish to loop through the BBH IDs, counting how many are in each of the                      # We next wish to loop through the BBH IDs, counting how many are in each of the
296                      # sets. If a genome occurs twice, we only want to count the first occurrence, so                      # sets. If a genome occurs twice, we only want to count the first occurrence, so
# Line 293  Line 308 
308                      # Loop through the BBHs/Sims.                      # Loop through the BBHs/Sims.
309                      for my $bbhPeg (keys %{$bbhList}) {                      for my $bbhPeg (keys %{$bbhList}) {
310                          # Get the genome ID. We want to find out if this genome is new.                          # Get the genome ID. We want to find out if this genome is new.
311                          my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                          my $genomeID = $sprout->GenomeOf($bbhPeg);
312                          if (! exists $alreadySeen{$genomeID}) {                          if (! exists $alreadySeen{$genomeID}) {
313                              # It's new, so we check to see which set it's in.                              # It's new, so we check to see which set it's in.
314                              if ($targetGenomes{$genomeID}) {                              if ($targetGenomes{$genomeID}) {
# Line 346  Line 361 
361                      if ($okFlag) {                      if ($okFlag) {
362                          # Put this feature to the output. We have one or two extra columns.                          # Put this feature to the output. We have one or two extra columns.
363                          # First we store the score.                          # First we store the score.
364                          $fquery->AddExtraColumns(score => sprintf("%.3f",$score));                          $rhelp->PutExtraColumns(score => sprintf("%0.3f",$score));
365                          # Next we add the list of matching genes, but only if "showMatch" is specified.                          # Next we add the list of matching genes, but only if "showMatch" is specified.
366                          if ($showMatch) {                          if ($showMatch) {
367                              # The matching genes are in the hash "genesMatching".                              # The matching genes are in the hash "genesMatching".
# Line 354  Line 369 
369                              # We need to linkify them.                              # We need to linkify them.
370                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);
371                              # Now add them as an extra column.                              # Now add them as an extra column.
372                              $fquery->AddExtraColumns(matches => $genesHTML);                              $rhelp->PutExtraColumns(matches => $genesHTML);
373                          }                          }
374                            # Compute a sort key from the feature data and the score.
375                            my $sort = $rhelp->SortKey($record, sprintf("%0.3f", 1 - $score));
376                            # Output the feature.
377                          $saveTime = time();                          $saveTime = time();
378                          $self->PutFeature($fquery);                          $rhelp->PutData($sort, $fid, $record);
379                          $putTimer += time() - $saveTime;                          $putTimer += time() - $saveTime;
380                          # Increase the result count.                          # Increase the result count.
381                          $retVal++;                          $retVal++;
# Line 369  Line 387 
387                      }                      }
388                  }                  }
389              }              }
         }  
390          # Close the session file.          # Close the session file.
391          $saveTime = time();          $saveTime = time();
392          $self->CloseSession();          $self->CloseSession();
393          $putTimer += time() - $saveTime;          $putTimer += time() - $saveTime;
394      }      }
395        }
396      # Trace the timers.      # Trace the timers.
397      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);
398      # Return the result count.      # Return the result count.
# Line 398  Line 416 
416      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
417  }  }
418    
419  =head3 SortKey  =head3 SearchTitle
   
 C<< my $key = $shelp->SortKey($fdata); >>  
   
 Return the sort key for the specified feature data. The default is to sort by feature name,  
 floating NMPDR organisms to the top. If a full-text search is used, then the default  
 sort is by relevance followed by feature name. This sort may be overridden by the  
 search class to provide fancier functionality. This method is called by  
 B<PutFeature>, so it is only used for feature searches. A non-feature search  
 would presumably have its own sort logic.  
420    
421  =over 4  C<< my $titleHtml = $shelp->SearchTitle(); >>
   
 =item record  
   
 The C<FeatureData> containing the current feature.  
   
 =item RETURN  
   
 Returns a key field that can be used to sort this row in among the results.  
422    
423  =back  Return the display title for this search. The display title appears above the search results.
424    If no result is returned, no title will be displayed. The result should be an html string
425    that can be legally put inside a block tag such as C<h3> or C<p>.
426    
427  =cut  =cut
428    
429  sub SortKey {  sub SearchTitle {
430      # Get the parameters.      # Get the parameters.
431      my ($self, $fdata) = @_;      my ($self) = @_;
432      # Get the score.      # Compute the title. We extract the relevant clues from the query parameters.
433      my $retVal = $fdata->GetExtraColumn('score');      my $cgi = $self->Q();
434      # Invert it to create a sort with the high score first.      my $type = ($cgi->param('useSims') ? "Similarities" : "Bidirectional Best Hits");
435      $retVal = sprintf("%0.3f", 1 - $retVal);      my $style = ($cgi->param('exclusion') ? "Discriminating" : "Common");
436      Trace("Sort key for " . $fdata->FID() . " is $retVal.") if T(4);      my $retVal = "$style Genes using $type";
437      # Return the result.      # Return it.
438      return $retVal;      return $retVal;
439  }  }
440    
   
   
441  =head2 Internal Utilities  =head2 Internal Utilities
442    
443  =head3 IsCommon  =head3 IsCommon

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3