[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Tue Apr 10 06:10:02 2007 UTC revision 1.16, Mon Jul 16 20:04:51 2007 UTC
# Line 4  Line 4 
4    
5      use strict;      use strict;
6      use Tracer;      use Tracer;
     use SearchHelper;  
7      use CGI;      use CGI;
8      use HTML;      use HTML;
9      use Sprout;      use Sprout;
10      use Time::HiRes;      use Time::HiRes;
11      use FIGRules;      use FIGRules;
12        use RHFeatures;
13      our @ISA = qw(SearchHelper);      use base 'SearchHelper';
14    
15  =head1 Gene Discrimination Feature Search Helper  =head1 Gene Discrimination Feature Search Helper
16    
# Line 96  Line 95 
95      my $statistical = $cgi->param('statistical') || 1;      my $statistical = $cgi->param('statistical') || 1;
96      my $showMatch = $cgi->param('showMatch') || 0;      my $showMatch = $cgi->param('showMatch') || 0;
97      my $useSims = $cgi->param('useSims') || 0;      my $useSims = $cgi->param('useSims') || 0;
98      my $pegsOnly = $cgi->param('pegsOnly') || 0;      my $pegsOnly = $cgi->param('pegsOnly') || 1;
99      # Now we build the table rows.      # Now we build the table rows.
100      my @rows = ();      my @rows = ();
101      # First we have the given genome.      # First we have the given genome.
# Line 116  Line 115 
115                                    $cgi->checkbox(-name => 'statistical',                                    $cgi->checkbox(-name => 'statistical',
116                                                   -checked => $statistical,                                                   -checked => $statistical,
117                                                   -value => 1,                                                   -value => 1,
118                                                   -label => 'Use Statistical Algorithm'),                                                   -label => 'Use Statistical Algorithm') .
119                                      SearchHelper::Hint("When two sets of genomees are specified, check this " .
120                                                         "box to use a statistical algorithm designed " .
121                                                         "specifically to choose differentiating genes. " .
122                                                         "This box has no effect when looking for genes " .
123                                                         "in common."),
124                                    $cgi->checkbox(-name => 'useSims',                                    $cgi->checkbox(-name => 'useSims',
125                                                   -checked => $useSims,                                                   -checked => $useSims,
126                                                   -value => 1,                                                   -value => 1,
127                                                   -label => 'Use Similarities')))),                                                   -label => 'Use Similarities') .
128                                      SearchHelper::Hint("Normally, Bidirectional Best Hits are used to " .
129                                                         "find matching genes. Check this box to use " .
130                                                         "similarities instead.")))),
131                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",
132                                    $cgi->checkbox(-name => 'showMatch',                                    $cgi->checkbox(-name => 'showMatch',
133                                                   -checked => $showMatch,                                                   -checked => $showMatch,
134                                                   -value => 1,                                                   -value => 1,
135                                                   -label => 'Show Matching Genes'),                                                   -label => 'Show Matching Genes') .
136                                    $cgi->checkbox(-name => 'pegsOnly',                                    SearchHelper::Hint("Check this button to display the genes matching " .
137                                                   -checked => $pegsOnly,                                                       "each gene displayed in the results.")))),
                                                  -value => 1,  
                                                  -label => 'PEGs Only')))),  
138                  $cgi->Tr($cgi->td("Cutoff"),                  $cgi->Tr($cgi->td("Cutoff"),
139                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
140                                                    -value => $cutoff,                                                    -value => $cutoff,
141                                                    -size => 5)));                                                    -size => 5)));
142      # Next, the feature filter rows.      # Next, the feature filter rows.
143      push @rows, $self->FeatureFilterRows();      push @rows, RHFeatures::WordSearchRow($self);
144        push @rows, RHFeatures::FeatureFilterFormRows($self);
145      # Finally, the submit button.      # Finally, the submit button.
146      push @rows, $self->SubmitRow();      push @rows, $self->SubmitRow();
147      # Create the table.      # Create the table.
# Line 166  Line 172 
172      # Declare the return variable. If it remains undefined, the caller will      # Declare the return variable. If it remains undefined, the caller will
173      # assume there was an error.      # assume there was an error.
174      my $retVal;      my $retVal;
     # Denote the extra columns go at the end.  
     $self->SetExtraPos(1);  
175      # Create the timers.      # Create the timers.
176      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);      my ($saveTime, $loopCounter, $bbhTimer, $putTimer, $queryTimer) = (0, 0, 0, 0, 0);
177      # Validate the numeric parameters.      # Validate the numeric parameters.
178      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
179      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
     my $pegsOnly = $cgi->param('pegsOnly');  
180      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
181          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
182      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 183  Line 186 
186      } elsif ($cutoff > 1) {      } elsif ($cutoff > 1) {
187          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
188      } else {      } else {
189            # Get the result helper.
190            my $rhelp = RHFeatures->new($self);
191            # Set up the default columns.
192            $self->DefaultColumns($rhelp);
193            # Add the score at the end.
194            $rhelp->AddExtraColumn(score => undef, title => 'Score', style => 'rightAlign', download => 'num');
195            # Find out if we need to show matching genes.
196            my $showMatch = $cgi->param('showMatch') || 0;
197            # If we do, add a column for them at the front.
198            if ($showMatch) {
199                $rhelp->AddExtraColumn(matches => 0, title => 'Matches', style => 'leftAlign', download => 'list');
200            }
201            # Only proceed if the filtering parameters are valid.
202            if ($rhelp->Valid()) {
203                # Start the output session.
204                $self->OpenSession($rhelp);
205          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
206          $self->PrintLine("Gathering the target genomes.  ");          $self->PrintLine("Gathering the target genomes.  ");
207          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
# Line 195  Line 214 
214              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
215          } else {          } else {
216              # Insure the given genome is in the target set.              # Insure the given genome is in the target set.
217              $targetGenomes{$givenGenomeID} = 1                  $targetGenomes{$givenGenomeID} = 1;
218          }          }
219          # Find out if we want to use a statistical analysis.          # Find out if we want to use a statistical analysis.
220          my $statistical = $cgi->param('statistical') || 1;          my $statistical = $cgi->param('statistical') || 1;
         # Find out if we need to show matching genes.  
         my $showMatch = $cgi->param('showMatch') || 0;  
221          # Denote we have not yet found any genomes.          # Denote we have not yet found any genomes.
222          $retVal = 0;          $retVal = 0;
223          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
224          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
         # Set the parameter that indicates whether or not we're in PEGs-only mode.  
         my $pegMode = ($pegsOnly ? 'peg' : undef);  
225          # Get the peg matrix.          # Get the peg matrix.
226          Trace("Requesting matrix.") if T(3);          Trace("Requesting matrix.") if T(3);
227          $saveTime = time();          $saveTime = time();
# Line 216  Line 231 
231              $self->PrintLine("Requesting bidirectional best hits.  ");              $self->PrintLine("Requesting bidirectional best hits.  ");
232              %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);              %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
233          } else {          } else {
234              # Here we are using similarities, which is much more complicated.                  # Here we are using similarities, which are much more complicated.
235              $self->PrintLine("Requesting similarities.<br />");              $self->PrintLine("Requesting similarities.<br />");
236              # Create a filtering matrix for the results. We only want to keep PEGs in the              # Create a filtering matrix for the results. We only want to keep PEGs in the
237              # specified target and exclusion genomes.              # specified target and exclusion genomes.
238              my %keepGenomes = map { $_ => 1 } @allGenomes;              my %keepGenomes = map { $_ => 1 } @allGenomes;
239              # Loop through the given genome's features.              # Loop through the given genome's features.
240              my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);                  my @features = $sprout->FeaturesOf($givenGenomeID);
241              for my $fid (@features) {              for my $fid (@features) {
242                  $self->PrintLine("Retrieving similarities for $fid.  ");                  $self->PrintLine("Retrieving similarities for $fid.  ");
243                  # Get this feature's similarities.                  # Get this feature's similarities.
# Line 235  Line 250 
250                  $simCount = 0;                  $simCount = 0;
251                  for my $sim (@{$simList}) {                  for my $sim (@{$simList}) {
252                      # Insure this similarity lands on a target genome.                      # Insure this similarity lands on a target genome.
253                      my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);                          my $genomeID2 = $sprout->GenomeOf($sim->id2);
254                      if ($keepGenomes{$genomeID2}) {                      if ($keepGenomes{$genomeID2}) {
255                          # Here we're keeping the similarity, so we put it in this feature's hash.                          # Here we're keeping the similarity, so we put it in this feature's hash.
256                          $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;                          $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;
# Line 252  Line 267 
267          # genome.          # genome.
268          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
269          $saveTime = time();          $saveTime = time();
270          my $fquery = FeatureQuery->new($self, $givenGenomeID);              my $fquery = $rhelp->GetQuery($givenGenomeID);
271          $queryTimer += time() - $saveTime;          $queryTimer += time() - $saveTime;
272          # Get the sizes of the two sets. This information is useful in computing commonality.          # Get the sizes of the two sets. This information is useful in computing commonality.
273          my $targetSetSize = scalar keys %targetGenomes;          my $targetSetSize = scalar keys %targetGenomes;
# Line 262  Line 277 
277          while (! $done) {          while (! $done) {
278              # Get the next feature.              # Get the next feature.
279              $saveTime = time();              $saveTime = time();
280              my $record = $fquery->Fetch();                  my $record = $rhelp->Fetch($fquery);
281              $queryTimer += time() - $saveTime;              $queryTimer += time() - $saveTime;
282              if (! $record) {              if (! $record) {
283                  $done = 1;                  $done = 1;
284              } else {              } else {
285                  # Get the feature's ID.                  # Get the feature's ID.
286                  my $fid = $fquery->FID();                      my $fid = $record->PrimaryValue('Feature(id)');
                 # Insure we want to look at this feature.  
                 if ($fid =~ /\.peg\./ || ! $pegsOnly) {  
287                      Trace("Checking feature $fid.") if T(4);                      Trace("Checking feature $fid.") if T(4);
288                      $self->PrintLine("Checking feature $fid.<br />");                      $self->PrintLine("Checking feature $fid.<br />");
289                      # Get its list of BBHs. The list is actually a hash mapping each BBH to its                      # Get its list of matching genes. The list is actually a hash mapping each matched gene to its
290                      # score. All we care about, however, are the BBHs themselves.                      # score. All we care about, however, are the matches themselves.
291                      my $bbhList = $bbhMatrix{$fid};                      my $bbhList = $bbhMatrix{$fid};
292                      # We next wish to loop through the BBH IDs, counting how many are in each of the                      # We next wish to loop through the BBH IDs, counting how many are in each of the
293                      # sets. If a genome occurs twice, we only want to count the first occurrence, so                      # sets. If a genome occurs twice, we only want to count the first occurrence, so
# Line 292  Line 305 
305                      # Loop through the BBHs/Sims.                      # Loop through the BBHs/Sims.
306                      for my $bbhPeg (keys %{$bbhList}) {                      for my $bbhPeg (keys %{$bbhList}) {
307                          # Get the genome ID. We want to find out if this genome is new.                          # Get the genome ID. We want to find out if this genome is new.
308                          my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                          my $genomeID = $sprout->GenomeOf($bbhPeg);
309                          if (! exists $alreadySeen{$genomeID}) {                          if (! exists $alreadySeen{$genomeID}) {
310                              # It's new, so we check to see which set it's in.                              # It's new, so we check to see which set it's in.
311                              if ($targetGenomes{$genomeID}) {                              if ($targetGenomes{$genomeID}) {
# Line 345  Line 358 
358                      if ($okFlag) {                      if ($okFlag) {
359                          # Put this feature to the output. We have one or two extra columns.                          # Put this feature to the output. We have one or two extra columns.
360                          # First we store the score.                          # First we store the score.
361                          $fquery->AddExtraColumns(score => sprintf("%.3f",$score));                          $rhelp->PutExtraColumns(score => sprintf("%0.3f",$score));
362                          # Next we add the list of matching genes, but only if "showMatch" is specified.                          # Next we add the list of matching genes, but only if "showMatch" is specified.
363                          if ($showMatch) {                          if ($showMatch) {
364                              # The matching genes are in the hash "genesMatching".                              # The matching genes are in the hash "genesMatching".
# Line 353  Line 366 
366                              # We need to linkify them.                              # We need to linkify them.
367                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);                              my $genesHTML = join(", ", map { HTML::fid_link($cgi, $_) } @genes);
368                              # Now add them as an extra column.                              # Now add them as an extra column.
369                              $fquery->AddExtraColumns(matches => $genesHTML);                              $rhelp->PutExtraColumns(matches => $genesHTML);
370                          }                          }
371                            # Compute a sort key from the feature data and the score.
372                            my $sort = $rhelp->SortKey($record, sprintf("%0.3f", 1 - $score));
373                            # Output the feature.
374                          $saveTime = time();                          $saveTime = time();
375                          $self->PutFeature($fquery);                          $rhelp->PutData($sort, $fid, $record);
376                          $putTimer += time() - $saveTime;                          $putTimer += time() - $saveTime;
377                          # Increase the result count.                          # Increase the result count.
378                          $retVal++;                          $retVal++;
# Line 368  Line 384 
384                      }                      }
385                  }                  }
386              }              }
         }  
387          # Close the session file.          # Close the session file.
388          $saveTime = time();          $saveTime = time();
389          $self->CloseSession();          $self->CloseSession();
390          $putTimer += time() - $saveTime;          $putTimer += time() - $saveTime;
391      }      }
392        }
393      # Trace the timers.      # Trace the timers.
394      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);
395      # Return the result count.      # Return the result count.
# Line 397  Line 413 
413      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";      return "Search for genes that are common to a group of organisms or that discriminate between two groups of organisms.";
414  }  }
415    
416    =head3 SearchTitle
417    
418    C<< my $titleHtml = $shelp->SearchTitle(); >>
419    
420    Return the display title for this search. The display title appears above the search results.
421    If no result is returned, no title will be displayed. The result should be an html string
422    that can be legally put inside a block tag such as C<h3> or C<p>.
423    
424    =cut
425    
426    sub SearchTitle {
427        # Get the parameters.
428        my ($self) = @_;
429        # Compute the title. We extract the relevant clues from the query parameters.
430        my $cgi = $self->Q();
431        my $type = ($cgi->param('useSims') ? "Similarities" : "Bidirectional Best Hits");
432        my $style = ($cgi->param('exclusion') ? "Discriminating" : "Common");
433        my $retVal = "$style Genes using $type";
434        # Return it.
435        return $retVal;
436    }
437    
438  =head2 Internal Utilities  =head2 Internal Utilities
439    
440  =head3 IsCommon  =head3 IsCommon

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.16

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3