[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Thu May 17 23:44:21 2007 UTC revision 1.22, Mon Mar 16 00:24:23 2009 UTC
# Line 4  Line 4 
4    
5      use strict;      use strict;
6      use Tracer;      use Tracer;
7      use SearchHelper;      use CGI qw(-nosticky);
     use CGI;  
8      use HTML;      use HTML;
9      use Sprout;      use Sprout;
10      use Time::HiRes;      use Time::HiRes;
11      use FIGRules;      use FIGRules;
12        use RHFeatures;
13      our @ISA = qw(SearchHelper);      use base 'SearchHelper';
14    
15  =head1 Gene Discrimination Feature Search Helper  =head1 Gene Discrimination Feature Search Helper
16    
# Line 68  Line 67 
67    
68  =head3 Form  =head3 Form
69    
70  C<< my $html = $shelp->Form(); >>      my $html = $shelp->Form();
71    
72  Generate the HTML for a form to request a new search.  Generate the HTML for a form to request a new search.
73    
# Line 100  Line 99 
99      # Now we build the table rows.      # Now we build the table rows.
100      my @rows = ();      my @rows = ();
101      # First we have the given genome.      # First we have the given genome.
102      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Reference Genome"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Reference Genome"),
103                           $cgi->td({colspan => 2}, $givenMenu));                           CGI::td({colspan => 2}, $givenMenu));
104      # Now show the target and exclusion menus.      # Now show the target and exclusion menus.
105      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Inclusion Genomes (Set 1)"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Inclusion Genomes (Set 1)"),
106                           $cgi->td({colspan => 2}, $targetMenu));                           CGI::td({colspan => 2}, $targetMenu));
107      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Exclusion Genomes (Set 2)"),
108                           $cgi->td({colspan => 2}, $excludeMenu));                           CGI::td({colspan => 2}, $excludeMenu));
109      # Next, the tuning parameters.      # Next, the tuning parameters.
110      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, CGI::Tr(CGI::td("Commonality"),
111                           $cgi->td($cgi->textfield(-name => 'commonality',                           CGI::td(CGI::textfield(-name => 'commonality',
112                                                    -value => $commonality,                                                    -value => $commonality,
113                                                    -size => 5))),                                                    -size => 5))),
114                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",                  CGI::Tr(CGI::td(), CGI::td(join(" ",
115                                    $cgi->checkbox(-name => 'statistical',                                    CGI::checkbox(-name => 'statistical',
116                                                   -checked => $statistical,                                                   -checked => $statistical,
117                                                   -value => 1,                                                   -value => 1,
118                                                   -label => 'Use Statistical Algorithm'),                                                   -label => 'Use Statistical Algorithm') .
119                                    $cgi->checkbox(-name => 'useSims',                                    SearchHelper::Hint("SigGenes", 24),
120                                      CGI::checkbox(-name => 'useSims',
121                                                   -checked => $useSims,                                                   -checked => $useSims,
122                                                   -value => 1,                                                   -value => 1,
123                                                   -label => 'Use Similarities')))),                                                   -label => 'Use Similarities') .
124                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",                                    SearchHelper::Hint("SigGenes", 25)))),
125                                    $cgi->checkbox(-name => 'showMatch',                  CGI::Tr(CGI::td(), CGI::td(join(" ",
126                                      CGI::checkbox(-name => 'showMatch',
127                                                   -checked => $showMatch,                                                   -checked => $showMatch,
128                                                   -value => 1,                                                   -value => 1,
129                                                   -label => 'Show Matching Genes'),                                                   -label => 'Show Matching Genes') .
130  #                                  $cgi->checkbox(-name => 'pegsOnly',                                    SearchHelper::Hint("SigGenes", 26)))),
131  #                                                 -checked => $pegsOnly,                  CGI::Tr(CGI::td("Cutoff"),
132  #                                                 -value => 1,                           CGI::td(CGI::textfield(-name => 'cutoff',
 #                                                 -label => 'PEGs Only')  
                                   ))),  
                 $cgi->Tr($cgi->td("Cutoff"),  
                          $cgi->td($cgi->textfield(-name => 'cutoff',  
133                                                    -value => $cutoff,                                                    -value => $cutoff,
134                                                    -size => 5)));                                                    -size => 5)));
135      # Next, the feature filter rows.      # Next, the feature filter rows.
136      push @rows, $self->FeatureFilterRows();      push @rows, RHFeatures::WordSearchRow($self);
137        push @rows, RHFeatures::FeatureFilterFormRows($self);
138      # Finally, the submit button.      # Finally, the submit button.
139      push @rows, $self->SubmitRow();      push @rows, $self->SubmitRow();
140      # Create the table.      # Create the table.
# Line 149  Line 147 
147    
148  =head3 Find  =head3 Find
149    
150  C<< my $resultCount = $shelp->Find(); >>      my $resultCount = $shelp->Find();
151    
152  Conduct a search based on the current CGI query parameters. The search results will  Conduct a search based on the current CGI query parameters. The search results will
153  be written to the session cache file and the number of results will be  be written to the session cache file and the number of results will be
# Line 167  Line 165 
165      # Declare the return variable. If it remains undefined, the caller will      # Declare the return variable. If it remains undefined, the caller will
166      # assume there was an error.      # assume there was an error.
167      my $retVal;      my $retVal;
     # Denote the extra columns go at the end.  
     $self->SetExtraPos('score');  
168      # Create the timers.      # Create the timers.
169      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);
170      # Validate the numeric parameters.      # Validate the numeric parameters.
171      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
172      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
     my $pegsOnly = $cgi->param('pegsOnly') || 1;  
173      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
174          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
175      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 184  Line 179 
179      } elsif ($cutoff > 1) {      } elsif ($cutoff > 1) {
180          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
181      } else {      } else {
182            # Get the result helper.
183            my $rhelp = RHFeatures->new($self);
184            # Set up the default columns.
185            $self->DefaultColumns($rhelp);
186            # Add the score at the end.
187            $rhelp->AddExtraColumn(score => undef, title => 'Score', style => 'rightAlign', download => 'num');
188            # Find out if we need to show matching genes.
189            my $showMatch = $cgi->param('showMatch') || 0;
190            # If we do, add a column for them at the front.
191            if ($showMatch) {
192                $rhelp->AddExtraColumn(matches => 0, title => 'Matches', style => 'leftAlign', download => 'list');
193            }
194            # Only proceed if the filtering parameters are valid.
195            if ($rhelp->Valid()) {
196          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
197          $self->PrintLine("Gathering the target genomes.  ");          $self->PrintLine("Gathering the target genomes.  ");
198          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
199                Trace("Given genome is $givenGenomeID.") if T(3);
200          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
201                Trace("Target genomes are " . join(", ", sort keys %targetGenomes) . ".") if T(3);
202          $self->PrintLine("Gathering the exclusion genomes.  ");          $self->PrintLine("Gathering the exclusion genomes.  ");
203          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
204                Trace("Exclusion genomes are " . join(", ", sort keys %exclusionGenomes) . ".") if T(3);
205          $self->PrintLine("Validating the genome sets.<br />");          $self->PrintLine("Validating the genome sets.<br />");
206          # Insure the given genome is not in the exclusion set.          # Insure the given genome is not in the exclusion set.
207          if ($exclusionGenomes{$givenGenomeID}) {          if ($exclusionGenomes{$givenGenomeID}) {
208              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
209          } else {          } else {
210                    # Start the output session.
211                    $self->OpenSession($rhelp);
212              # Insure the given genome is in the target set.              # Insure the given genome is in the target set.
213              $targetGenomes{$givenGenomeID} = 1                  $targetGenomes{$givenGenomeID} = 1;
214          }                  Trace("$givenGenomeID added to target set.") if T(3);
215          # Find out if we want to use a statistical analysis.          # Find out if we want to use a statistical analysis.
216          my $statistical = $cgi->param('statistical') || 1;          my $statistical = $cgi->param('statistical') || 1;
         # Find out if we need to show matching genes.  
         my $showMatch = $cgi->param('showMatch') || 0;  
217          # Denote we have not yet found any genomes.          # Denote we have not yet found any genomes.
218          $retVal = 0;          $retVal = 0;
219          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
220          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
         # Set the parameter that indicates whether or not we're in PEGs-only mode.  
         my $pegMode = ($pegsOnly ? 'peg' : undef);  
221          # Get the peg matrix.          # Get the peg matrix.
222          Trace("Requesting matrix.") if T(3);          Trace("Requesting matrix.") if T(3);
223          $saveTime = time();          $saveTime = time();
224          my %bbhMatrix;                  my $bbhMatrix;
225          if (! $cgi->param('useSims')) {          if (! $cgi->param('useSims')) {
226              # Here we are using BBHs, which are fast enough to do in one gulp.              # Here we are using BBHs, which are fast enough to do in one gulp.
227              $self->PrintLine("Requesting bidirectional best hits.  ");              $self->PrintLine("Requesting bidirectional best hits.  ");
228              %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);                      $bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
229          } else {          } else {
230              # Here we are using similarities, which is much more complicated.                      # Here we are using similarities, which are much more complicated.
231              $self->PrintLine("Requesting similarities.<br />");              $self->PrintLine("Requesting similarities.<br />");
232              # Create a filtering matrix for the results. We only want to keep PEGs in the              # Create a filtering matrix for the results. We only want to keep PEGs in the
233              # specified target and exclusion genomes.              # specified target and exclusion genomes.
234              my %keepGenomes = map { $_ => 1 } @allGenomes;              my %keepGenomes = map { $_ => 1 } @allGenomes;
235              # Loop through the given genome's features.              # Loop through the given genome's features.
236              my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);                      my @features = $sprout->FeaturesOf($givenGenomeID);
237              for my $fid (@features) {              for my $fid (@features) {
238                  $self->PrintLine("Retrieving similarities for $fid.  ");                  $self->PrintLine("Retrieving similarities for $fid.  ");
239                  # Get this feature's similarities.                  # Get this feature's similarities.
# Line 231  Line 241 
241                  my $simCount = scalar @{$simList};                  my $simCount = scalar @{$simList};
242                  $self->PrintLine("Raw similarity count: $simCount.  ");                  $self->PrintLine("Raw similarity count: $simCount.  ");
243                  # Create the matrix hash for this feature.                  # Create the matrix hash for this feature.
244                  $bbhMatrix{$fid} = {};                          $bbhMatrix->{$fid} = {};
245                  # Now we need to filter out the similarities that don't land on the target genome.                  # Now we need to filter out the similarities that don't land on the target genome.
246                  $simCount = 0;                  $simCount = 0;
247                  for my $sim (@{$simList}) {                  for my $sim (@{$simList}) {
248                      # Insure this similarity lands on a target genome.                      # Insure this similarity lands on a target genome.
249                      my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);                              my $genomeID2 = $sprout->GenomeOf($sim->id2);
250                      if ($keepGenomes{$genomeID2}) {                      if ($keepGenomes{$genomeID2}) {
251                          # Here we're keeping the similarity, so we put it in this feature's hash.                          # Here we're keeping the similarity, so we put it in this feature's hash.
252                          $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;                                  $bbhMatrix->{$fid}->{$sim->id2} = $sim->psc;
253                          $simCount++;                          $simCount++;
254                      }                      }
255                  }                  }
# Line 253  Line 263 
263          # genome.          # genome.
264          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
265          $saveTime = time();          $saveTime = time();
266          my $fquery = FeatureQuery->new($self, $givenGenomeID);                  my $fquery = $rhelp->GetQuery($givenGenomeID);
267          $queryTimer += time() - $saveTime;          $queryTimer += time() - $saveTime;
268          # Get the sizes of the two sets. This information is useful in computing commonality.          # Get the sizes of the two sets. This information is useful in computing commonality.
269          my $targetSetSize = scalar keys %targetGenomes;          my $targetSetSize = scalar keys %targetGenomes;
# Line 263  Line 273 
273          while (! $done) {          while (! $done) {
274              # Get the next feature.              # Get the next feature.
275              $saveTime = time();              $saveTime = time();
276              my $record = $fquery->Fetch();                      my $record = $rhelp->Fetch($fquery);
277              $queryTimer += time() - $saveTime;              $queryTimer += time() - $saveTime;
278              if (! $record) {              if (! $record) {
279                  $done = 1;                  $done = 1;
280              } else {              } else {
281                  # Get the feature's ID.                  # Get the feature's ID.
282                  my $fid = $fquery->FID();                          my $fid = $record->PrimaryValue('Feature(id)');
                 # Insure we want to look at this feature.  
                 if ($fid =~ /\.peg\./ || ! $pegsOnly) {  
283                      Trace("Checking feature $fid.") if T(4);                      Trace("Checking feature $fid.") if T(4);
284                      $self->PrintLine("Checking feature $fid.<br />");                      $self->PrintLine("Checking feature $fid.<br />");
285                      # Get its list of BBHs. The list is actually a hash mapping each BBH to its                          # Get its list of matching genes. The list is actually a hash mapping each matched gene to its
286                      # score. All we care about, however, are the BBHs themselves.                          # score. All we care about, however, are the matches themselves.
287                      my $bbhList = $bbhMatrix{$fid};                          my $bbhList = $bbhMatrix->{$fid};
288                      # We next wish to loop through the BBH IDs, counting how many are in each of the                      # We next wish to loop through the BBH IDs, counting how many are in each of the
289                      # sets. If a genome occurs twice, we only want to count the first occurrence, so                      # sets. If a genome occurs twice, we only want to count the first occurrence, so
290                      # we have a hash of genomes we've already seen. The hash will map each gene ID                      # we have a hash of genomes we've already seen. The hash will map each gene ID
# Line 293  Line 301 
301                      # Loop through the BBHs/Sims.                      # Loop through the BBHs/Sims.
302                      for my $bbhPeg (keys %{$bbhList}) {                      for my $bbhPeg (keys %{$bbhList}) {
303                          # Get the genome ID. We want to find out if this genome is new.                          # Get the genome ID. We want to find out if this genome is new.
304                          my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                              my $genomeID = $sprout->GenomeOf($bbhPeg);
305                          if (! exists $alreadySeen{$genomeID}) {                          if (! exists $alreadySeen{$genomeID}) {
306                              # It's new, so we check to see which set it's in.                              # It's new, so we check to see which set it's in.
307                              if ($targetGenomes{$genomeID}) {                              if ($targetGenomes{$genomeID}) {
# Line 346  Line 354 
354                      if ($okFlag) {                      if ($okFlag) {
355                          # Put this feature to the output. We have one or two extra columns.                          # Put this feature to the output. We have one or two extra columns.
356                          # First we store the score.                          # First we store the score.
357                          $fquery->AddExtraColumns(score => sprintf("%.3f",$score));                              $rhelp->PutExtraColumns(score => sprintf("%0.3f",$score));
358                          # Next we add the list of matching genes, but only if "showMatch" is specified.                          # Next we add the list of matching genes, but only if "showMatch" is specified.
359                          if ($showMatch) {                          if ($showMatch) {
360                              # The matching genes are in the hash "genesMatching".                              # The matching genes are in the hash "genesMatching".
# Line 354  Line 362 
362                              # We need to linkify them.                              # We need to linkify them.
363                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);
364                              # Now add them as an extra column.                              # Now add them as an extra column.
365                              $fquery->AddExtraColumns(matches => $genesHTML);                                  $rhelp->PutExtraColumns(matches => $genesHTML);
366                          }                          }
367                                # Compute a sort key from the feature data and the score.
368                                my $sort = $rhelp->SortKey($record, sprintf("%0.3f", 1 - $score));
369                                # Output the feature.
370                          $saveTime = time();                          $saveTime = time();
371                          $self->PutFeature($fquery);                              $rhelp->PutData($sort, $fid, $record);
372                          $putTimer += time() - $saveTime;                          $putTimer += time() - $saveTime;
373                          # Increase the result count.                          # Increase the result count.
374                          $retVal++;                          $retVal++;
# Line 369  Line 380 
380                      }                      }
381                  }                  }
382              }              }
         }  
383          # Close the session file.          # Close the session file.
384          $saveTime = time();          $saveTime = time();
385          $self->CloseSession();          $self->CloseSession();
386          $putTimer += time() - $saveTime;          $putTimer += time() - $saveTime;
387      }      }
388            }
389        }
390      # Trace the timers.      # Trace the timers.
391      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);
392      # Return the result count.      # Return the result count.
# Line 383  Line 395 
395    
396  =head3 Description  =head3 Description
397    
398  C<< my $htmlText = $shelp->Description(); >>      my $htmlText = $shelp->Description();
399    
400  Return a description of this search. The description is used for the table of contents  Return a description of this search. The description is used for the table of contents
401  on the main search tools page. It may contain HTML, but it should be character-level,  on the main search tools page. It may contain HTML, but it should be character-level,
# Line 398  Line 410 
410      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
411  }  }
412    
413  =head3 SortKey  =head3 SearchTitle
   
 C<< my $key = $shelp->SortKey($fdata); >>  
414    
415  Return the sort key for the specified feature data. The default is to sort by feature name,      my $titleHtml = $shelp->SearchTitle();
 floating NMPDR organisms to the top. If a full-text search is used, then the default  
 sort is by relevance followed by feature name. This sort may be overridden by the  
 search class to provide fancier functionality. This method is called by  
 B<PutFeature>, so it is only used for feature searches. A non-feature search  
 would presumably have its own sort logic.  
416    
417  =over 4  Return the display title for this search. The display title appears above the search results.
418    If no result is returned, no title will be displayed. The result should be an html string
419  =item record  that can be legally put inside a block tag such as C<h3> or C<p>.
   
 The C<FeatureData> containing the current feature.  
   
 =item RETURN  
   
 Returns a key field that can be used to sort this row in among the results.  
   
 =back  
420    
421  =cut  =cut
422    
423  sub SortKey {  sub SearchTitle {
424      # Get the parameters.      # Get the parameters.
425      my ($self, $fdata) = @_;      my ($self) = @_;
426      # Get the score.      # Compute the title. We extract the relevant clues from the query parameters.
427      my $retVal = $fdata->GetExtraColumn('score');      my $cgi = $self->Q();
428      # Invert it to create a sort with the high score first.      my $type = ($cgi->param('useSims') ? "Similarities" : "Bidirectional Best Hits");
429      $retVal = sprintf("%0.3f", 1 - $retVal);      my $style = ($cgi->param('exclusion') ? "Discriminating" : "Common");
430      Trace("Sort key for " . $fdata->FID() . " is $retVal.") if T(4);      my $retVal = "$style Genes using $type";
431      # Return the result.      # Return it.
432      return $retVal;      return $retVal;
433  }  }
434    
   
   
435  =head2 Internal Utilities  =head2 Internal Utilities
436    
437  =head3 IsCommon  =head3 IsCommon
438    
439  C<< my $score = SHSigGenes::IsCommon($count, $size, $commonality); >>      my $score = SHSigGenes::IsCommon($count, $size, $commonality);
440    
441  Return the match score if a specified count indicates a gene is common in a specified set  Return the match score if a specified count indicates a gene is common in a specified set
442  and 0 otherwise. Commonality is computed by dividing the count by the size of the set and  and 0 otherwise. Commonality is computed by dividing the count by the size of the set and

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3