[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Wed Feb 21 13:20:37 2007 UTC revision 1.12, Tue Apr 10 06:10:02 2007 UTC
# Line 95  Line 95 
95      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
96      my $statistical = $cgi->param('statistical') || 1;      my $statistical = $cgi->param('statistical') || 1;
97      my $showMatch = $cgi->param('showMatch') || 0;      my $showMatch = $cgi->param('showMatch') || 0;
98        my $useSims = $cgi->param('useSims') || 0;
99        my $pegsOnly = $cgi->param('pegsOnly') || 0;
100      # Now we build the table rows.      # Now we build the table rows.
101      my @rows = ();      my @rows = ();
102      # First we have the given genome.      # First we have the given genome.
# Line 105  Line 107 
107                           $cgi->td({colspan => 2}, $targetMenu));                           $cgi->td({colspan => 2}, $targetMenu));
108      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
109                           $cgi->td({colspan => 2}, $excludeMenu));                           $cgi->td({colspan => 2}, $excludeMenu));
110      # Next, the numeric parameters.      # Next, the tuning parameters.
111      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
112                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
113                                                    -value => $commonality,                                                    -value => $commonality,
# Line 114  Line 116 
116                                    $cgi->checkbox(-name => 'statistical',                                    $cgi->checkbox(-name => 'statistical',
117                                                   -checked => $statistical,                                                   -checked => $statistical,
118                                                   -value => 1,                                                   -value => 1,
119                                                   -label => 'Use Statistical Algorithm')),                                                   -label => 'Use Statistical Algorithm'),
120                                      $cgi->checkbox(-name => 'useSims',
121                                                     -checked => $useSims,
122                                                     -value => 1,
123                                                     -label => 'Use Similarities')))),
124                    $cgi->Tr($cgi->td(), $cgi->td(join(" ",
125                                    $cgi->checkbox(-name => 'showMatch',                                    $cgi->checkbox(-name => 'showMatch',
126                                                   -checked => $showMatch,                                                   -checked => $showMatch,
127                                                   -value => 1,                                                   -value => 1,
128                                                   -label => 'Show Matching Genes')));                                                   -label => 'Show Matching Genes'),
129      push @rows, $cgi->Tr($cgi->td("Cutoff"),                                    $cgi->checkbox(-name => 'pegsOnly',
130                                                     -checked => $pegsOnly,
131                                                     -value => 1,
132                                                     -label => 'PEGs Only')))),
133                    $cgi->Tr($cgi->td("Cutoff"),
134                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
135                                                    -value => $cutoff,                                                    -value => $cutoff,
136                                                    -size => 5)));                                                    -size => 5)));
# Line 162  Line 173 
173      # Validate the numeric parameters.      # Validate the numeric parameters.
174      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
175      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
176        my $pegsOnly = $cgi->param('pegsOnly');
177      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
178          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
179      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 172  Line 184 
184          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
185      } else {      } else {
186          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
187            $self->PrintLine("Gathering the target genomes.  ");
188          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
189          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
190            $self->PrintLine("Gathering the exclusion genomes.  ");
191          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
192            $self->PrintLine("Validating the genome sets.<br />");
193          # Insure the given genome is not in the exclusion set.          # Insure the given genome is not in the exclusion set.
194          if ($exclusionGenomes{$givenGenomeID}) {          if ($exclusionGenomes{$givenGenomeID}) {
195              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
# Line 190  Line 205 
205          $retVal = 0;          $retVal = 0;
206          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
207          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
208          # Get the BBH matrix.          # Set the parameter that indicates whether or not we're in PEGs-only mode.
209            my $pegMode = ($pegsOnly ? 'peg' : undef);
210            # Get the peg matrix.
211            Trace("Requesting matrix.") if T(3);
212          $saveTime = time();          $saveTime = time();
213          my %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);          my %bbhMatrix;
214            if (! $cgi->param('useSims')) {
215                # Here we are using BBHs, which are fast enough to do in one gulp.
216                $self->PrintLine("Requesting bidirectional best hits.  ");
217                %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
218            } else {
219                # Here we are using similarities, which is much more complicated.
220                $self->PrintLine("Requesting similarities.<br />");
221                # Create a filtering matrix for the results. We only want to keep PEGs in the
222                # specified target and exclusion genomes.
223                my %keepGenomes = map { $_ => 1 } @allGenomes;
224                # Loop through the given genome's features.
225                my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);
226                for my $fid (@features) {
227                    $self->PrintLine("Retrieving similarities for $fid.  ");
228                    # Get this feature's similarities.
229                    my $simList = $sprout->Sims($fid, 1000, $cutoff, 'fig');
230                    my $simCount = scalar @{$simList};
231                    $self->PrintLine("Raw similarity count: $simCount.  ");
232                    # Create the matrix hash for this feature.
233                    $bbhMatrix{$fid} = {};
234                    # Now we need to filter out the similarities that don't land on the target genome.
235                    $simCount = 0;
236                    for my $sim (@{$simList}) {
237                        # Insure this similarity lands on a target genome.
238                        my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);
239                        if ($keepGenomes{$genomeID2}) {
240                            # Here we're keeping the similarity, so we put it in this feature's hash.
241                            $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;
242                            $simCount++;
243                        }
244                    }
245                    $self->PrintLine("Similarities retained: $simCount.<br />");
246                }
247            }
248          $bbhTimer += time() - $saveTime;          $bbhTimer += time() - $saveTime;
249            $self->PrintLine("Time to build matrix: $bbhTimer seconds.<br />");
250            Trace("Matrix built.") if T(3);
251          # Create a feature query object to loop through the chosen features of the given          # Create a feature query object to loop through the chosen features of the given
252          # genome.          # genome.
253          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
# Line 215  Line 269 
269              } else {              } else {
270                  # Get the feature's ID.                  # Get the feature's ID.
271                  my $fid = $fquery->FID();                  my $fid = $fquery->FID();
272                  Trace("Processing feature $fid.") if T(4);                  # Insure we want to look at this feature.
273                    if ($fid =~ /\.peg\./ || ! $pegsOnly) {
274                        Trace("Checking feature $fid.") if T(4);
275                        $self->PrintLine("Checking feature $fid.<br />");
276                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its
277                  # score. All we care about, however, are the BBHs themselves.                  # score. All we care about, however, are the BBHs themselves.
278                  my $bbhList = $bbhMatrix{$fid};                  my $bbhList = $bbhMatrix{$fid};
# Line 232  Line 289 
289                  # Denote that we're in our own genome.                  # Denote that we're in our own genome.
290                  $alreadySeen{$givenGenomeID} = 0;                  $alreadySeen{$givenGenomeID} = 0;
291                  my $targetCount = 1;                  my $targetCount = 1;
292                  # Loop through the BBHs.                      # Loop through the BBHs/Sims.
293                  for my $bbhPeg (keys %{$bbhList}) {                  for my $bbhPeg (keys %{$bbhList}) {
294                      # Get the genome ID. We want to find out if this genome is new.                      # Get the genome ID. We want to find out if this genome is new.
295                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
# Line 272  Line 329 
329                      # If the two differentials are greater than one, we keep this feature.                      # If the two differentials are greater than one, we keep this feature.
330                      $score = $inD + $outD;                      $score = $inD + $outD;
331                      $okFlag = ($score > 1);                      $okFlag = ($score > 1);
332                            # Subtract 1 from the score so it looks like the commonality score.
333                            $score -= 1.0;
334                  } else {                  } else {
335                      # Check to see if we're common in set 1 and not in set 2.                      # Check to see if we're common in set 1 and not in set 2.
336                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);
337                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);
338                      if ($score1 && ! $score2) {                      if ($score1 && ! $score2) {
339                          # We satisfy the criterion, so we put this feature to the output. The                          # We satisfy the criterion, so we put this feature to the output. The
340                          # score is normalize to a range similar to the scores in the statistical                              # score is essentially $score1, since $score2 is zero.
341                          # method.                              $score = $score1;
                         $score = $score1 + (1 - $score2);  
342                          $okFlag = 1;                          $okFlag = 1;
343                      }                      }
344                  }                  }
# Line 310  Line 368 
368                  }                  }
369              }              }
370          }          }
371            }
372          # Close the session file.          # Close the session file.
373          $saveTime = time();          $saveTime = time();
374          $self->CloseSession();          $self->CloseSession();

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.12

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3