[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.110, Tue Apr 29 20:54:51 2008 UTC revision 1.111, Wed May 7 23:11:51 2008 UTC
# Line 15  Line 15 
15      use CustomAttributes;      use CustomAttributes;
16      use RemoteCustomAttributes;      use RemoteCustomAttributes;
17      use CGI;      use CGI;
18        use WikiTools;
19      use base qw(ERDB);      use base qw(ERDB);
20    
21  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
# Line 147  Line 148 
148          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
149          $retVal->{_ca} = CustomAttributes->new(user => $user);          $retVal->{_ca} = CustomAttributes->new(user => $user);
150      }      }
151        # Insure we have access to the stem module.
152        WikiUse('Lingua::Stem');
153        $retVal->{stemmer} = Lingua::Stem->new();
154        $retVal->{stemmer}->stem_caching({ -level => 2 });
155      # Return it.      # Return it.
156      return $retVal;      return $retVal;
157  }  }
# Line 678  Line 683 
683  }  }
684    
685    
686    =head3 Stem
687    
688        my $stem = $sprout->Stem($word);
689    
690    Return the stem of the specified word, or C<undef> if the word is not
691    stemmable. Note that even if the word is stemmable, the stem may be
692    the same as the original word.
693    
694    =over 4
695    
696    =item word
697    
698    Word to convert into a stem.
699    
700    =item RETURN
701    
702    Returns a stem of the word (which may be the word itself), or C<undef> if
703    the word is not stemmable.
704    
705    =back
706    
707    =cut
708    
709    sub Stem {
710        # Get the parameters.
711        my ($self, $word) = @_;
712        # Declare the return variable.
713        my $retVal;
714        # See if it's stemmable.
715        if ($word =~ /^[A-Za-z]+$/) {
716            # Compute the stem.
717            my $stemList = $self->{stemmer}->stem($word);
718            my $stem = $stemList->[0];
719            # Check to see if it's long enough.
720            if (length $stem >= 3) {
721                # Yes, keep it.
722                $retVal = $stem;
723            } else {
724                # No, use the original word.
725                $retVal = $word;
726            }
727        }
728        # Return the result.
729        return $retVal;
730    }
731    
732    
733  =head3 Build  =head3 Build
734    
735      $sprout->Build();      $sprout->Build();
# Line 4033  Line 4085 
4085      # Get the parameters.      # Get the parameters.
4086      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4087      # Perform the standard cleanup.      # Perform the standard cleanup.
4088      my $retVal = $self->ERDB::CleanKeywords($searchExpression);      my $words = $self->ERDB::CleanKeywords($searchExpression);
4089      # Fix the periods in EC and TC numbers.      # Fix the periods in EC and TC numbers.
4090      $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
4091      # Fix non-trailing periods.      # Fix non-trailing periods.
4092      $retVal =~ s/\.(\w)/_$1/g;      $words =~ s/\.(\w)/_$1/g;
4093      # Fix non-leading minus signs.      # Fix non-leading minus signs.
4094      $retVal =~ s/(\w)[\-]/$1_/g;      $words =~ s/(\w)[\-]/$1_/g;
4095      # Fix the vertical bars and colons      # Fix the vertical bars and colons
4096      $retVal =~ s/(\w)[|:](\w)/$1'$2/g;      $words =~ s/(\w)[|:](\w)/$1'$2/g;
4097        # Now split up the list so that each keyword is in its own string. We keep the delimiters
4098        # because they may contain boolean expression data.
4099        my @words = split /([^A-Za-z'0-9_]+)/, $words;
4100        # We'll convert the stemmable words into stems and re-assemble the result.
4101        my $retVal = "";
4102        for my $word (@words) {
4103            my $stem = $self->Stem($word);
4104            if (defined $stem) {
4105                $retVal .= $stem;
4106            } else {
4107                $retVal .= $word;
4108            }
4109        }
4110        Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4111      # Return the result.      # Return the result.
4112      return $retVal;      return $retVal;
4113  }  }

Legend:
Removed from v.1.110  
changed lines
  Added in v.1.111

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3