[Bio] / Sprout / SHSigGenes.pm Repository:
ViewVC logotype

Diff of /Sprout/SHSigGenes.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11, Wed Feb 21 13:20:37 2007 UTC revision 1.13, Tue Apr 10 15:34:07 2007 UTC
# Line 95  Line 95 
95      my $cutoff = $cgi->param('cutoff') || "1e-10";      my $cutoff = $cgi->param('cutoff') || "1e-10";
96      my $statistical = $cgi->param('statistical') || 1;      my $statistical = $cgi->param('statistical') || 1;
97      my $showMatch = $cgi->param('showMatch') || 0;      my $showMatch = $cgi->param('showMatch') || 0;
98        my $useSims = $cgi->param('useSims') || 0;
99        my $pegsOnly = $cgi->param('pegsOnly') || 1;
100      # Now we build the table rows.      # Now we build the table rows.
101      my @rows = ();      my @rows = ();
102      # First we have the given genome.      # First we have the given genome.
# Line 105  Line 107 
107                           $cgi->td({colspan => 2}, $targetMenu));                           $cgi->td({colspan => 2}, $targetMenu));
108      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),      push @rows, $cgi->Tr($cgi->td({valign => "top"}, "Exclusion Genomes (Set 2)"),
109                           $cgi->td({colspan => 2}, $excludeMenu));                           $cgi->td({colspan => 2}, $excludeMenu));
110      # Next, the numeric parameters.      # Next, the tuning parameters.
111      push @rows, $cgi->Tr($cgi->td("Commonality"),      push @rows, $cgi->Tr($cgi->td("Commonality"),
112                           $cgi->td($cgi->textfield(-name => 'commonality',                           $cgi->td($cgi->textfield(-name => 'commonality',
113                                                    -value => $commonality,                                                    -value => $commonality,
# Line 114  Line 116 
116                                    $cgi->checkbox(-name => 'statistical',                                    $cgi->checkbox(-name => 'statistical',
117                                                   -checked => $statistical,                                                   -checked => $statistical,
118                                                   -value => 1,                                                   -value => 1,
119                                                   -label => 'Use Statistical Algorithm')),                                                   -label => 'Use Statistical Algorithm'),
120                                      $cgi->checkbox(-name => 'useSims',
121                                                     -checked => $useSims,
122                                                     -value => 1,
123                                                     -label => 'Use Similarities')))),
124                    $cgi->Tr($cgi->td(), $cgi->td(join(" ",
125                                    $cgi->checkbox(-name => 'showMatch',                                    $cgi->checkbox(-name => 'showMatch',
126                                                   -checked => $showMatch,                                                   -checked => $showMatch,
127                                                   -value => 1,                                                   -value => 1,
128                                                   -label => 'Show Matching Genes')));                                                   -label => 'Show Matching Genes'),
129      push @rows, $cgi->Tr($cgi->td("Cutoff"),  #                                  $cgi->checkbox(-name => 'pegsOnly',
130    #                                                 -checked => $pegsOnly,
131    #                                                 -value => 1,
132    #                                                 -label => 'PEGs Only')
133                                      ))),
134                    $cgi->Tr($cgi->td("Cutoff"),
135                           $cgi->td($cgi->textfield(-name => 'cutoff',                           $cgi->td($cgi->textfield(-name => 'cutoff',
136                                                    -value => $cutoff,                                                    -value => $cutoff,
137                                                    -size => 5)));                                                    -size => 5)));
# Line 162  Line 174 
174      # Validate the numeric parameters.      # Validate the numeric parameters.
175      my $commonality = $cgi->param('commonality');      my $commonality = $cgi->param('commonality');
176      my $cutoff = $cgi->param('cutoff');      my $cutoff = $cgi->param('cutoff');
177        my $pegsOnly = $cgi->param('pegsOnly') || 1;
178      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {      if ($commonality !~ /^\s*\d(\.\d+)?\s*$/) {
179          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");          $self->SetMessage("Commonality value appears invalid, too big, negative, or not a number.");
180      } elsif ($commonality <= 0 || $commonality > 1) {      } elsif ($commonality <= 0 || $commonality > 1) {
# Line 172  Line 185 
185          $self->SetMessage("Cutoff cannot be greater than 1.");          $self->SetMessage("Cutoff cannot be greater than 1.");
186      } else {      } else {
187          # Now we need to gather and validate the genome sets.          # Now we need to gather and validate the genome sets.
188            $self->PrintLine("Gathering the target genomes.  ");
189          my ($givenGenomeID) = $self->GetGenomes('given');          my ($givenGenomeID) = $self->GetGenomes('given');
190          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');          my %targetGenomes = map { $_ => 1 } $self->GetGenomes('target');
191            $self->PrintLine("Gathering the exclusion genomes.  ");
192          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');          my %exclusionGenomes = map { $_ => 1 } $self->GetGenomes('exclusion');
193            $self->PrintLine("Validating the genome sets.<br />");
194          # Insure the given genome is not in the exclusion set.          # Insure the given genome is not in the exclusion set.
195          if ($exclusionGenomes{$givenGenomeID}) {          if ($exclusionGenomes{$givenGenomeID}) {
196              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");              $self->SetMessage("The given genome ($givenGenomeID) cannot be in the exclusion set.");
# Line 190  Line 206 
206          $retVal = 0;          $retVal = 0;
207          # Compute the list of genomes of interest.          # Compute the list of genomes of interest.
208          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);          my @allGenomes = (keys %exclusionGenomes, keys %targetGenomes);
209          # Get the BBH matrix.          # Set the parameter that indicates whether or not we're in PEGs-only mode.
210            my $pegMode = ($pegsOnly ? 'peg' : undef);
211            # Get the peg matrix.
212            Trace("Requesting matrix.") if T(3);
213          $saveTime = time();          $saveTime = time();
214          my %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);          my %bbhMatrix;
215            if (! $cgi->param('useSims')) {
216                # Here we are using BBHs, which are fast enough to do in one gulp.
217                $self->PrintLine("Requesting bidirectional best hits.  ");
218                %bbhMatrix = $sprout->BBHMatrix($givenGenomeID, $cutoff, @allGenomes);
219            } else {
220                # Here we are using similarities, which is much more complicated.
221                $self->PrintLine("Requesting similarities.<br />");
222                # Create a filtering matrix for the results. We only want to keep PEGs in the
223                # specified target and exclusion genomes.
224                my %keepGenomes = map { $_ => 1 } @allGenomes;
225                # Loop through the given genome's features.
226                my @features = $sprout->FeaturesOf($givenGenomeID, $pegMode);
227                for my $fid (@features) {
228                    $self->PrintLine("Retrieving similarities for $fid.  ");
229                    # Get this feature's similarities.
230                    my $simList = $sprout->Sims($fid, 1000, $cutoff, 'fig');
231                    my $simCount = scalar @{$simList};
232                    $self->PrintLine("Raw similarity count: $simCount.  ");
233                    # Create the matrix hash for this feature.
234                    $bbhMatrix{$fid} = {};
235                    # Now we need to filter out the similarities that don't land on the target genome.
236                    $simCount = 0;
237                    for my $sim (@{$simList}) {
238                        # Insure this similarity lands on a target genome.
239                        my ($genomeID2) = FIGRules::ParseFeatureID($sim->id2);
240                        if ($keepGenomes{$genomeID2}) {
241                            # Here we're keeping the similarity, so we put it in this feature's hash.
242                            $bbhMatrix{$fid}->{$sim->id2} = $sim->psc;
243                            $simCount++;
244                        }
245                    }
246                    $self->PrintLine("Similarities retained: $simCount.<br />");
247                }
248            }
249          $bbhTimer += time() - $saveTime;          $bbhTimer += time() - $saveTime;
250            $self->PrintLine("Time to build matrix: $bbhTimer seconds.<br />");
251            Trace("Matrix built.") if T(3);
252          # Create a feature query object to loop through the chosen features of the given          # Create a feature query object to loop through the chosen features of the given
253          # genome.          # genome.
254          Trace("Creating feature query.") if T(3);          Trace("Creating feature query.") if T(3);
# Line 215  Line 270 
270              } else {              } else {
271                  # Get the feature's ID.                  # Get the feature's ID.
272                  my $fid = $fquery->FID();                  my $fid = $fquery->FID();
273                  Trace("Processing feature $fid.") if T(4);                  # Insure we want to look at this feature.
274                    if ($fid =~ /\.peg\./ || ! $pegsOnly) {
275                        Trace("Checking feature $fid.") if T(4);
276                        $self->PrintLine("Checking feature $fid.<br />");
277                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its                  # Get its list of BBHs. The list is actually a hash mapping each BBH to its
278                  # score. All we care about, however, are the BBHs themselves.                  # score. All we care about, however, are the BBHs themselves.
279                  my $bbhList = $bbhMatrix{$fid};                  my $bbhList = $bbhMatrix{$fid};
# Line 232  Line 290 
290                  # Denote that we're in our own genome.                  # Denote that we're in our own genome.
291                  $alreadySeen{$givenGenomeID} = 0;                  $alreadySeen{$givenGenomeID} = 0;
292                  my $targetCount = 1;                  my $targetCount = 1;
293                  # Loop through the BBHs.                      # Loop through the BBHs/Sims.
294                  for my $bbhPeg (keys %{$bbhList}) {                  for my $bbhPeg (keys %{$bbhList}) {
295                      # Get the genome ID. We want to find out if this genome is new.                      # Get the genome ID. We want to find out if this genome is new.
296                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);                      my ($genomeID) = FIGRules::ParseFeatureID($bbhPeg);
# Line 272  Line 330 
330                      # If the two differentials are greater than one, we keep this feature.                      # If the two differentials are greater than one, we keep this feature.
331                      $score = $inD + $outD;                      $score = $inD + $outD;
332                      $okFlag = ($score > 1);                      $okFlag = ($score > 1);
333                            # Subtract 1 from the score so it looks like the commonality score.
334                            $score -= 1.0;
335                  } else {                  } else {
336                      # Check to see if we're common in set 1 and not in set 2.                      # Check to see if we're common in set 1 and not in set 2.
337                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);                      my $score1 = IsCommon($targetCount, $targetSetSize, $commonality);
338                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);                      my $score2 = IsCommon($exclusionCount, $exclusionSetSize, $commonality);
339                      if ($score1 && ! $score2) {                      if ($score1 && ! $score2) {
340                          # We satisfy the criterion, so we put this feature to the output. The                          # We satisfy the criterion, so we put this feature to the output. The
341                          # score is normalize to a range similar to the scores in the statistical                              # score is essentially $score1, since $score2 is zero.
342                          # method.                              $score = $score1;
                         $score = $score1 + (1 - $score2);  
343                          $okFlag = 1;                          $okFlag = 1;
344                      }                      }
345                  }                  }
# Line 310  Line 369 
369                  }                  }
370              }              }
371          }          }
372            }
373          # Close the session file.          # Close the session file.
374          $saveTime = time();          $saveTime = time();
375          $self->CloseSession();          $self->CloseSession();

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3