[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.110, Tue Apr 29 20:54:51 2008 UTC revision 1.113, Tue Aug 12 06:01:49 2008 UTC
# Line 15  Line 15 
15      use CustomAttributes;      use CustomAttributes;
16      use RemoteCustomAttributes;      use RemoteCustomAttributes;
17      use CGI;      use CGI;
18        use WikiTools;
19      use base qw(ERDB);      use base qw(ERDB);
20    
21  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
# Line 147  Line 148 
148          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
149          $retVal->{_ca} = CustomAttributes->new(user => $user);          $retVal->{_ca} = CustomAttributes->new(user => $user);
150      }      }
151        # Insure we have access to the stem module.
152        WikiUse('Lingua::Stem');
153        $retVal->{stemmer} = Lingua::Stem->new();
154        $retVal->{stemmer}->stem_caching({ -level => 2 });
155      # Return it.      # Return it.
156      return $retVal;      return $retVal;
157  }  }
# Line 547  Line 552 
552      if (ref $selections ne 'ARRAY') {      if (ref $selections ne 'ARRAY') {
553          $selections = [ split /\s*,\s*/, $selections ];          $selections = [ split /\s*,\s*/, $selections ];
554      }      }
555      my %selected = map { $_ => } @{$selections};      my %selected = map { $_ => 1 } @{$selections};
556      # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited      # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited
557      # string or a list reference.      # string or a list reference.
558      my $filterParms = $options{filter} || "";      my $filterParms = $options{filter} || "";
# Line 678  Line 683 
683  }  }
684    
685    
686    =head3 Stem
687    
688        my $stem = $sprout->Stem($word);
689    
690    Return the stem of the specified word, or C<undef> if the word is not
691    stemmable. Note that even if the word is stemmable, the stem may be
692    the same as the original word.
693    
694    =over 4
695    
696    =item word
697    
698    Word to convert into a stem.
699    
700    =item RETURN
701    
702    Returns a stem of the word (which may be the word itself), or C<undef> if
703    the word is not stemmable.
704    
705    =back
706    
707    =cut
708    
709    sub Stem {
710        # Get the parameters.
711        my ($self, $word) = @_;
712        # Declare the return variable.
713        my $retVal;
714        # See if it's stemmable.
715        if ($word =~ /^[A-Za-z]+$/) {
716            # Compute the stem.
717            my $stemList = $self->{stemmer}->stem($word);
718            my $stem = $stemList->[0];
719            # Check to see if it's long enough.
720            if (length $stem >= 3) {
721                # Yes, keep it.
722                $retVal = $stem;
723            } else {
724                # No, use the original word.
725                $retVal = $word;
726            }
727        }
728        # Return the result.
729        return $retVal;
730    }
731    
732    
733  =head3 Build  =head3 Build
734    
735      $sprout->Build();      $sprout->Build();
# Line 1910  Line 1962 
1962      if ($featureID =~ /^fig\|(\d+\.\d+)/) {      if ($featureID =~ /^fig\|(\d+\.\d+)/) {
1963          $retVal = $1;          $retVal = $1;
1964      } else {      } else {
1965            # Find the feature by alias.
1966            my ($realFeatureID) = $self->FeaturesByAlias($featureID);
1967            if ($realFeatureID && $realFeatureID =~ /^fig\|(\d+\.\d+)/) {
1968                $retVal = $1;
1969            } else {
1970                # Use the external table.
1971                my ($org) = $self->GetFlat(['ExternalAliasOrg'], "ExternalAliasOrg(id) = ?",
1972                                           [$featureID], "ExternalAliasOrg(org)");
1973                if ($org) {
1974                    $retVal = $org;
1975                } else {
1976          Confess("Invalid feature ID $featureID.");          Confess("Invalid feature ID $featureID.");
1977      }      }
1978            }
1979        }
1980      # Return the value found.      # Return the value found.
1981      return $retVal;      return $retVal;
1982  }  }
# Line 4033  Line 4098 
4098      # Get the parameters.      # Get the parameters.
4099      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4100      # Perform the standard cleanup.      # Perform the standard cleanup.
4101      my $retVal = $self->ERDB::CleanKeywords($searchExpression);      my $words = $self->ERDB::CleanKeywords($searchExpression);
4102      # Fix the periods in EC and TC numbers.      # Fix the periods in EC and TC numbers.
4103      $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
4104      # Fix non-trailing periods.      # Fix non-trailing periods.
4105      $retVal =~ s/\.(\w)/_$1/g;      $words =~ s/\.(\w)/_$1/g;
4106      # Fix non-leading minus signs.      # Fix non-leading minus signs.
4107      $retVal =~ s/(\w)[\-]/$1_/g;      $words =~ s/(\w)[\-]/$1_/g;
4108      # Fix the vertical bars and colons      # Fix the vertical bars and colons
4109      $retVal =~ s/(\w)[|:](\w)/$1'$2/g;      $words =~ s/(\w)[|:](\w)/$1'$2/g;
4110        # Now split up the list so that each keyword is in its own string. We keep the delimiters
4111        # because they may contain boolean expression data.
4112        my @words = split /([^A-Za-z'0-9_]+)/, $words;
4113        # We'll convert the stemmable words into stems and re-assemble the result.
4114        my $retVal = "";
4115        for my $word (@words) {
4116            my $stem = $self->Stem($word);
4117            if (defined $stem) {
4118                $retVal .= $stem;
4119            } else {
4120                $retVal .= $word;
4121            }
4122        }
4123        Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4124      # Return the result.      # Return the result.
4125      return $retVal;      return $retVal;
4126  }  }

Legend:
Removed from v.1.110  
changed lines
  Added in v.1.113

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3