[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.17, Mon Aug 20 23:25:32 2007 UTC revision 1.22, Mon Mar 16 00:24:23 2009 UTC
# Line 4  Line 4 
4    
5      use strict;      use strict;
6      use Tracer;      use Tracer;
7      use CGI;      use CGI qw(-nosticky);
8      use HTML;      use HTML;
9      use Sprout;      use Sprout;
10      use Time::HiRes;      use Time::HiRes;
# Line 67  Line 67 
67    
68  =head3 Form  =head3 Form
69    
70  C<< my $html = $shelp->Form(); >>      my $html = $shelp->Form();
71    
72  Generate the HTML for a form to request a new search.  Generate the HTML for a form to request a new search.
73    
# Line 99  Line 99 
99      # Now we build the table rows.      # Now we build the table rows.
100      my @rows = ();      my @rows = ();
101      # First we have the given genome.      # First we have the given genome.
102      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Reference Genome"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Reference Genome"),
103                           $cgi->td({colspan => 2}, $givenMenu));                           CGI::td({colspan => 2}, $givenMenu));
104      # Now show the target and exclusion menus.      # Now show the target and exclusion menus.
105      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Inclusion Genomes (Set 1)"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Inclusion Genomes (Set 1)"),
106                           $cgi->td({colspan => 2}, $targetMenu));                           CGI::td({colspan => 2}, $targetMenu));
107      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, CGI::Tr(CGI::td({valign => "top"}, "Exclusion Genomes (Set 2)"),
108                           $cgi->td({colspan => 2}, $excludeMenu));                           CGI::td({colspan => 2}, $excludeMenu));
109      # Next, the tuning parameters.      # Next, the tuning parameters.
110      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, CGI::Tr(CGI::td("Commonality"),
111                           $cgi->td($cgi->textfield(-name => 'commonality',                           CGI::td(CGI::textfield(-name => 'commonality',
112                                                    -value => $commonality,                                                    -value => $commonality,
113                                                    -size => 5))),                                                    -size => 5))),
114                  $cgi->Tr($cgi->td(), $cgi->td(join(" ",                  CGI::Tr(CGI::td(), CGI::td(join(" ",
115                                    $cgi->checkbox(-name => 'statistical',                                    CGI::checkbox(-name => 'statistical',
116                                                   -checked => $statistical,                                                   -checked => $statistical,
117                                                   -value => 1,                                                   -value => 1,
118                                                   -label => 'Use Statistical Algorithm') .                                                   -label => 'Use Statistical Algorithm') .
119                                    SearchHelper::Hint("SigGenes",                                    SearchHelper::Hint("SigGenes", 24),
120                                                       "When two sets of genomees are specified, check this " .                                    CGI::checkbox(-name => 'useSims',
                                                      "box to use a statistical algorithm designed " .  
                                                      "specifically to choose differentiating genes. " .  
                                                      "This box has no effect when looking for genes " .  
                                                      "in common."),  
                                   $cgi->checkbox(-name => 'useSims',  
121                                                   -checked => $useSims,                                                   -checked => $useSims,
122                                                   -value => 1,                                                   -value => 1,
123                                                   -label => 'Use Similarities') .                                                   -label => 'Use Similarities') .
124                                    SearchHelper::Hint("SigGenes",                                    SearchHelper::Hint("SigGenes", 25)))),
125                                                       "Normally, Bidirectional Best Hits are used to " .                  CGI::Tr(CGI::td(), CGI::td(join(" ",
126                                                       "find matching genes. Check this box to use " .                                    CGI::checkbox(-name => 'showMatch',
                                                      "similarities instead.")))),  
                 $cgi->Tr($cgi->td(), $cgi->td(join(" ",  
                                   $cgi->checkbox(-name => 'showMatch',  
127                                                   -checked => $showMatch,                                                   -checked => $showMatch,
128                                                   -value => 1,                                                   -value => 1,
129                                                   -label => 'Show Matching Genes') .                                                   -label => 'Show Matching Genes') .
130                                    SearchHelper::Hint("SigGenes",                                    SearchHelper::Hint("SigGenes", 26)))),
131                                                       "Check this button to display the genes matching " .                  CGI::Tr(CGI::td("Cutoff"),
132                                                       "each gene displayed in the results.")))),                           CGI::td(CGI::textfield(-name => 'cutoff',
                 $cgi->Tr($cgi->td("Cutoff"),  
                          $cgi->td($cgi->textfield(-name => 'cutoff',  
133                                                    -value => $cutoff,                                                    -value => $cutoff,
134                                                    -size => 5)));                                                    -size => 5)));
135      # Next, the feature filter rows.      # Next, the feature filter rows.
# Line 157  Line 147 
147    
148  =head3 Find  =head3 Find
149    
150  C<< my $resultCount = $shelp->Find(); >>      my $resultCount = $shelp->Find();
151    
152  Conduct a search based on the current CGI query parameters. The search results will  Conduct a search based on the current CGI query parameters. The search results will
153  be written to the session cache file and the number of results will be  be written to the session cache file and the number of results will be
# Line 203  Line 193 
193          }          }
194          # Only proceed if the filtering parameters are valid.          # Only proceed if the filtering parameters are valid.
195          if ($rhelp->Valid()) {          if ($rhelp->Valid()) {
             # Start the output session.  
             $self->OpenSession($rhelp);  
196              # Now we need to gather and validate the genome sets.              # Now we need to gather and validate the genome sets.
197              $self->PrintLine("Gathering the target genomes.  ");              $self->PrintLine("Gathering the target genomes.  ");
198              my ($givenGenomeID) = $self->GetGenomes('given');              my ($givenGenomeID) = $self->GetGenomes('given');
199                Trace("Given genome is $givenGenomeID.") if T(3);
200              my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');              my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
201                Trace("Target genomes are " . join(", ", sort keys %targetGenomes) . ".") if T(3);
202              $self->PrintLine("Gathering the exclusion genomes.  ");              $self->PrintLine("Gathering the exclusion genomes.  ");
203              my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');              my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
204                Trace("Exclusion genomes are " . join(", ", sort keys %exclusionGenomes) . ".") if T(3);
205              $self->PrintLine("Validating the genome sets.<br />");              $self->PrintLine("Validating the genome sets.<br />");
206              # Insure the given genome is not in the exclusion set.              # Insure the given genome is not in the exclusion set.
207              if ($exclusionGenomes{$givenGenomeID}) {              if ($exclusionGenomes{$givenGenomeID}) {
208                  $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");                  $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
209              } else {              } else {
210                    # Start the output session.
211                    $self->OpenSession($rhelp);
212                  # Insure the given genome is in the target set.                  # Insure the given genome is in the target set.
213                  $targetGenomes{$givenGenomeID} = 1;                  $targetGenomes{$givenGenomeID} = 1;
214              }                  Trace("$givenGenomeID added to target set.") if T(3);
215              # Find out if we want to use a statistical analysis.              # Find out if we want to use a statistical analysis.
216              my $statistical = $cgi->param('statistical') || 1;              my $statistical = $cgi->param('statistical') || 1;
217              # Denote we have not yet found any genomes.              # Denote we have not yet found any genomes.
# Line 228  Line 221 
221              # Get the peg matrix.              # Get the peg matrix.
222              Trace("Requesting matrix.") if T(3);              Trace("Requesting matrix.") if T(3);
223              $saveTime = time();              $saveTime = time();
224              my %bbhMatrix;                  my $bbhMatrix;
225              if (! $cgi->param('useSims')) {              if (! $cgi->param('useSims')) {
226                  # Here we are using BBHs, which are fast enough to do in one gulp.                  # Here we are using BBHs, which are fast enough to do in one gulp.
227                  $self->PrintLine("Requesting bidirectional best hits.  ");                  $self->PrintLine("Requesting bidirectional best hits.  ");
228                  %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);                      $bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
229              } else {              } else {
230                  # Here we are using similarities, which are much more complicated.                  # Here we are using similarities, which are much more complicated.
231                  $self->PrintLine("Requesting similarities.<br />");                  $self->PrintLine("Requesting similarities.<br />");
# Line 248  Line 241 
241                      my $simCount = scalar @{$simList};                      my $simCount = scalar @{$simList};
242                      $self->PrintLine("Raw similarity count: $simCount.  ");                      $self->PrintLine("Raw similarity count: $simCount.  ");
243                      # Create the matrix hash for this feature.                      # Create the matrix hash for this feature.
244                      $bbhMatrix{$fid} = {};                          $bbhMatrix->{$fid} = {};
245                      # Now we need to filter out the similarities that don't land on the target genome.                      # Now we need to filter out the similarities that don't land on the target genome.
246                      $simCount = 0;                      $simCount = 0;
247                      for my $sim (@{$simList}) {                      for my $sim (@{$simList}) {
# Line 256  Line 249 
249                          my $genomeID2 = $sprout->GenomeOf($sim->id2);                          my $genomeID2 = $sprout->GenomeOf($sim->id2);
250                          if ($keepGenomes{$genomeID2}) {                          if ($keepGenomes{$genomeID2}) {
251                              # Here we're keeping the similarity, so we put it in this feature's hash.                              # Here we're keeping the similarity, so we put it in this feature's hash.
252                              $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;                                  $bbhMatrix->{$fid}->{$sim->id2} = $sim->psc;
253                              $simCount++;                              $simCount++;
254                          }                          }
255                      }                      }
# Line 291  Line 284 
284                      $self->PrintLine("Checking feature $fid.<br />");                      $self->PrintLine("Checking feature $fid.<br />");
285                      # Get its list of matching genes. The list is actually a hash mapping each matched gene to its                      # Get its list of matching genes. The list is actually a hash mapping each matched gene to its
286                      # score. All we care about, however, are the matches themselves.                      # score. All we care about, however, are the matches themselves.
287                      my $bbhList = $bbhMatrix{$fid};                          my $bbhList = $bbhMatrix->{$fid};
288                      # We next wish to loop through the BBH IDs, counting how many are in each of the                      # We next wish to loop through the BBH IDs, counting how many are in each of the
289                      # sets. If a genome occurs twice, we only want to count the first occurrence, so                      # sets. If a genome occurs twice, we only want to count the first occurrence, so
290                      # we have a hash of genomes we've already seen. The hash will map each gene ID                      # we have a hash of genomes we've already seen. The hash will map each gene ID
# Line 393  Line 386 
386              $putTimer += time() - $saveTime;              $putTimer += time() - $saveTime;
387          }          }
388      }      }
389        }
390      # Trace the timers.      # Trace the timers.
391      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);      Trace("Time spent: Put = $putTimer, Query = $queryTimer, BBH = $bbhTimer.") if T(3);
392      # Return the result count.      # Return the result count.
# Line 401  Line 395 
395    
396  =head3 Description  =head3 Description
397    
398  C<< my $htmlText = $shelp->Description(); >>      my $htmlText = $shelp->Description();
399    
400  Return a description of this search. The description is used for the table of contents  Return a description of this search. The description is used for the table of contents
401  on the main search tools page. It may contain HTML, but it should be character-level,  on the main search tools page. It may contain HTML, but it should be character-level,
# Line 418  Line 412 
412    
413  =head3 SearchTitle  =head3 SearchTitle
414    
415  C<< my $titleHtml = $shelp->SearchTitle(); >>      my $titleHtml = $shelp->SearchTitle();
416    
417  Return the display title for this search. The display title appears above the search results.  Return the display title for this search. The display title appears above the search results.
418  If no result is returned, no title will be displayed. The result should be an html string  If no result is returned, no title will be displayed. The result should be an html string
# Line 442  Line 436 
436    
437  =head3 IsCommon  =head3 IsCommon
438    
439  C<< my $score = SHSigGenes::IsCommon($count, $size, $commonality); >>      my $score = SHSigGenes::IsCommon($count, $size, $commonality);
440    
441  Return the match score if a specified count indicates a gene is common in a specified set  Return the match score if a specified count indicates a gene is common in a specified set
442  and 0 otherwise. Commonality is computed by dividing the count by the size of the set and  and 0 otherwise. Commonality is computed by dividing the count by the size of the set and

Legend:
Removed from v.1.17  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3