[Bio] / Sprout / SHWordSearch.pm Repository:
ViewVC logotype

Annotation of /Sprout/SHWordSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SHWordSearch;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use SearchHelper;
8 :     use CGI;
9 :     use HTML;
10 :     use Sprout;
11 :     use FeatureData;
12 :     use FeatureQuery;
13 :    
14 :     our @ISA = qw(SearchHelper);
15 :    
16 :     =head1 Simple Keyword Search Feature Search Helper
17 :    
18 :     =head2 Introduction
19 :    
20 :     This is a basic keyword search engine. Keyword searching is a subset of
21 :     the FidSearch mechanism, which allows filtering by keyword amongst a
22 :     host of other options; however, having a separate search class gives
23 :     new users a way to search without all the extra clutter.
24 :    
25 :     It has the following extra parameters.
26 :    
27 :     =over 4
28 :    
29 :     =item keywords
30 :    
31 :     Search expression. This is essentially a space-delimited list of words with the following
32 :     optional operators applied.
33 :    
34 :     C<+>: A leading plus sign indicates that this word must be present in every row returned.
35 :    
36 :     C<->: A leading minus sign indicates that this word must not be present in any row returned.
37 :     Note that if every search term has a leading minus sign, nothing will match. This is an
38 :     artifact of the search algorithm.
39 :    
40 :     B<(no operator)>: By default (when neither + nor - is specified) the word is optional, but the
41 :     rows that contain it are rated higher.
42 :    
43 :     C<< > < >>: These two operators are used to change a word's contribution to the relevance value
44 :     that is assigned to a row. The C<< > >> operator increases the contribution and the C<< < >>
45 :     operator decreases it.
46 :    
47 :     C<( )>: Parentheses are used to group words into subexpressions. Parenthesized groups can be nested.
48 :    
49 :     C<~>: A leading tilde acts as a negation operator, causing the word's contribution to the row
50 :     relevance to be negative. It's useful for marking noise words. A row that contains such a
51 :     word is rated lower than others, but is not excluded altogether, as it would be with the C<->
52 :     operator.
53 :    
54 :     C<*>: An asterisk is the truncation operator. Unlike the other operators, it should be appended to the word.
55 :    
56 :     C<"> A phrase that is enclosed within double quote characters matches only rows that contain the phrase
57 :     literally, as it was typed.
58 :    
59 :     =item group[]
60 :    
61 :     If specified, these should be the names of NMPDR groups to which the search is to be
62 :     restricted. Otherwise, all groups are searched. This parameter is not on the form; rather,
63 :     it is provided as a quick way to do keyword searches restricted to groups on pages that
64 :     want to provide that capability.
65 :    
66 :     =back
67 :    
68 :     =head2 Virtual Methods
69 :    
70 :     =head3 Form
71 :    
72 :     C<< my $html = $shelp->Include(); >>
73 :    
74 :     Generate the HTML for a form to request a new search.
75 :    
76 :     =cut
77 :    
78 :     sub Form {
79 :     # Get the parameters.
80 :     my ($self) = @_;
81 :     # Get the CGI and sprout objects.
82 :     my $cgi = $self->Q();
83 :     my $sprout = $self->DB();
84 :     # Start the form.
85 :     my $retVal = $self->FormStart("Keyword Search");
86 :     # Declare a variable to hold the table rows.
87 :     my @rows = ();
88 :     # The first row is for the keyword search expression.
89 :     my $expressionString = $cgi->param('keywords') || "";
90 :     push @rows, $cgi->Tr($cgi->td("Search Words"),
91 :     $cgi->td({colspan => 2}, $cgi->textfield(-name => 'keywords',
92 :     -value => $expressionString,
93 :     -size => 40)));
94 :     # The other row is for the submit button.
95 :     push @rows, $self->SubmitRow();
96 :     # Finally, if groups are specified, we include them as hidden fields and display
97 :     # an explanation.
98 :     my @groups = $cgi->param('group');
99 :     my $groupCount = scalar(@groups);
100 :     if ($groupCount) {
101 :     # The explanation format is a bit tricky because of the way the English language
102 :     # uses commas and conjunctions.
103 :     my $message = "Search restricted to ";
104 :     my $last = pop @groups;
105 :     if ($groupCount == 1) {
106 :     $message .= "$last.";
107 :     } else {
108 :     $message .= join(", ", @groups) . " and $last.";
109 :     }
110 :     # Assemble the hidden fields.
111 :     my @hiddens = map { $cgi->hidden(-name => 'group', -value => $_) } @groups, $last;
112 :     push @rows, $cgi->Tr($cgi->td(@hiddens), $cgi->td($message));
113 :     }
114 :     # Create the table.
115 :     $retVal .= $self->MakeTable(\@rows);
116 :     # Close the form.
117 :     $retVal .= $self->FormEnd();
118 :     # Return the result.
119 :     return $retVal;
120 :     }
121 :    
122 :     =head3 Find
123 :    
124 :     C<< my $resultCount = $shelp->Find(); >>
125 :    
126 :     Conduct a search based on the current CGI query parameters. The search results will
127 :     be written to the session cache file and the number of results will be
128 :     returned. If the search parameters are invalid, a result count of C<undef> will be
129 :     returned and a result message will be stored in this object describing the problem.
130 :    
131 :     =cut
132 :    
133 :     sub Find {
134 :     my ($self) = @_;
135 :     # Get the CGI and Sprout objects.
136 :     my $cgi = $self->Q();
137 :     my $sprout = $self->DB();
138 :     # Declare the return variable. If it remains undefined, the caller will
139 :     # know that an error occurred.
140 :     my $retVal;
141 :     # Get the keyword list. We need to prove that there is at least one keyword
142 :     # that does not begin with a "-".
143 :     my $keywords = $cgi->param('keywords') || "";
144 :     my @wordList = split /\s+/, $keywords;
145 :     my @plusWords = grep { $_ =~ /^[^\-]/ } @wordList;
146 :     if (! @wordList) {
147 :     $self->SetMessage("No search words specified.");
148 :     } elsif (! @plusWords) {
149 :     $self->SetMessage("At least one keyword must be positive. All the keywords entered are preceded by minus signs.");
150 :     } else {
151 :     # We have good keywords. Initialize the session file.
152 :     $self->OpenSession();
153 :     # Initialize the result counter.
154 :     $retVal = 0;
155 :     # Check for groups.
156 :     my @groups = $cgi->param('group');
157 :     if (@groups) {
158 :     # Here we do the search a group at a time.
159 :     for my $group (@groups) {
160 :     my $query = $sprout->Search($keywords, 0, ['Feature', 'HasFeature', 'Genome'],
161 :     "Genome(primary-group) = ?", [$group]);
162 :     $retVal += $self->ProcessQuery($query);
163 :     }
164 :     } else {
165 :     # Here we do one search just for features.
166 :     my $query = $sprout->Search($keywords, 0, ['Feature']);
167 :     $retVal += $self->ProcessQuery($query);
168 :     }
169 :     # Close the session file.
170 :     $self->CloseSession();
171 :     }
172 :     # Return the result count.
173 :     return $retVal;
174 :     }
175 :    
176 :     =head3 Description
177 :    
178 :     C<< my $htmlText = $shelp->Description(); >>
179 :    
180 :     Return a description of this search. The description is used for the table of contents
181 :     on the main search tools page. It may contain HTML, but it should be character-level,
182 :     not block-level, since the description is going to appear in a list.
183 :    
184 :     =cut
185 :    
186 :     sub Description {
187 :     # Get the parameters.
188 :     my ($self) = @_;
189 :     # Return the result.
190 :     return "Search for features based on keywords.";
191 :     }
192 :    
193 :     =head3 ProcessQuery
194 :    
195 :     C<< my $count = $shelp->ProcessQuery($query); >>
196 :    
197 :     Run through the results of a query, sending all the features retrieved to the output
198 :     cache. The number of features found will be returned to the caller.
199 :    
200 :     =over 4
201 :    
202 :     =item query
203 :    
204 :     A B<DBQuery> object that returns features.
205 :    
206 :     =item RETURN
207 :    
208 :     Returns the number of features found.
209 :    
210 :     =back
211 :    
212 :     =cut
213 :    
214 :     sub ProcessQuery {
215 :     # Get the parameters.
216 :     my ($self, $query) = @_;
217 :     # Clear the result counter.
218 :     my $retVal = 0;
219 :     # Create a feature data object for storing each result.
220 :     my $fd = FeatureData->new($self);
221 : parrello 1.2 Trace("Starting feature loop.") if T(3);
222 : parrello 1.1 # Loop through all the records returned by the query.
223 :     while (my $record = $query->Fetch()) {
224 :     # Store this feature.
225 :     $fd->Store($record);
226 :     # Send it to the output.
227 :     $self->PutFeature($fd);
228 :     # Increment the result counter.
229 :     $retVal++;
230 :     }
231 :     # Return the counter.
232 :     return $retVal;
233 :     }
234 :    
235 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3