[Bio] / Sprout / SHWordSearch.pm Repository:
ViewVC logotype

Annotation of /Sprout/SHWordSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SHWordSearch;
4 :    
5 :     use strict;
6 :     use Tracer;
7 : parrello 1.11 use CGI qw(-nosticky);
8 : parrello 1.1 use HTML;
9 :     use Sprout;
10 : parrello 1.9 use RHFeatures;
11 :     use base 'SearchHelper';
12 : parrello 1.1
13 :     =head1 Simple Keyword Search Feature Search Helper
14 :    
15 :     =head2 Introduction
16 :    
17 :     This is a basic keyword search engine. Keyword searching is a subset of
18 :     the FidSearch mechanism, which allows filtering by keyword amongst a
19 :     host of other options; however, having a separate search class gives
20 :     new users a way to search without all the extra clutter.
21 :    
22 :     It has the following extra parameters.
23 :    
24 :     =over 4
25 :    
26 :     =item keywords
27 :    
28 :     Search expression. This is essentially a space-delimited list of words with the following
29 :     optional operators applied.
30 :    
31 : parrello 1.10 C<+>: A leading plus sign indicates that this word must be present in every row returned.
32 : parrello 1.1
33 :     C<->: A leading minus sign indicates that this word must not be present in any row returned.
34 :     Note that if every search term has a leading minus sign, nothing will match. This is an
35 :     artifact of the search algorithm.
36 :    
37 :     B<(no operator)>: By default (when neither + nor - is specified) the word is optional, but the
38 :     rows that contain it are rated higher.
39 :    
40 : parrello 1.10 > <: These two operators are used to change a word's contribution to the relevance value
41 : parrello 1.1 that is assigned to a row. The C<< > >> operator increases the contribution and the C<< < >>
42 :     operator decreases it.
43 :    
44 : parrello 1.10 C<( )>: Parentheses are used to group words into subexpressions. Parenthesized groups can be nested.
45 : parrello 1.1
46 :     C<~>: A leading tilde acts as a negation operator, causing the word's contribution to the row
47 :     relevance to be negative. It's useful for marking noise words. A row that contains such a
48 :     word is rated lower than others, but is not excluded altogether, as it would be with the C<->
49 : parrello 1.10 operator.
50 : parrello 1.1
51 :     C<*>: An asterisk is the truncation operator. Unlike the other operators, it should be appended to the word.
52 :    
53 :     C<"> A phrase that is enclosed within double quote characters matches only rows that contain the phrase
54 :     literally, as it was typed.
55 :    
56 :     =item group[]
57 :    
58 :     If specified, these should be the names of NMPDR groups to which the search is to be
59 :     restricted. Otherwise, all groups are searched. This parameter is not on the form; rather,
60 :     it is provided as a quick way to do keyword searches restricted to groups on pages that
61 :     want to provide that capability.
62 :    
63 :     =back
64 :    
65 :     =head2 Virtual Methods
66 :    
67 :     =head3 Form
68 :    
69 : parrello 1.10 my $html = $shelp->Form();
70 : parrello 1.1
71 :     Generate the HTML for a form to request a new search.
72 :    
73 :     =cut
74 :    
75 :     sub Form {
76 :     # Get the parameters.
77 :     my ($self) = @_;
78 :     # Get the CGI and sprout objects.
79 :     my $cgi = $self->Q();
80 :     my $sprout = $self->DB();
81 :     # Start the form.
82 :     my $retVal = $self->FormStart("Keyword Search");
83 :     # Declare a variable to hold the table rows.
84 :     my @rows = ();
85 :     # The first row is for the keyword search expression.
86 : parrello 1.9 push @rows, RHFeatures::WordSearchRow($self);
87 : parrello 1.6 # The second row is for special options.
88 : parrello 1.9 push @rows, RHFeatures::FeatureFilterFormRows($self, 'options');
89 : parrello 1.6 # The last row is for the submit button.
90 : parrello 1.1 push @rows, $self->SubmitRow();
91 :     # Finally, if groups are specified, we include them as hidden fields and display
92 :     # an explanation.
93 :     my @groups = $cgi->param('group');
94 :     my $groupCount = scalar(@groups);
95 :     if ($groupCount) {
96 :     # The explanation format is a bit tricky because of the way the English language
97 :     # uses commas and conjunctions.
98 :     my $message = "Search restricted to ";
99 :     my $last = pop @groups;
100 :     if ($groupCount == 1) {
101 :     $message .= "$last.";
102 :     } else {
103 :     $message .= join(", ", @groups) . " and $last.";
104 :     }
105 :     # Assemble the hidden fields.
106 : parrello 1.11 my @hiddens = map { CGI::hidden(-name => 'group', -value => $_) } @groups, $last;
107 :     push @rows, CGI::Tr(CGI::td(@hiddens), CGI::td($message));
108 : parrello 1.1 }
109 :     # Create the table.
110 :     $retVal .= $self->MakeTable(\@rows);
111 :     # Close the form.
112 :     $retVal .= $self->FormEnd();
113 :     # Return the result.
114 :     return $retVal;
115 :     }
116 :    
117 :     =head3 Find
118 :    
119 : parrello 1.10 my $resultCount = $shelp->Find();
120 : parrello 1.1
121 :     Conduct a search based on the current CGI query parameters. The search results will
122 :     be written to the session cache file and the number of results will be
123 :     returned. If the search parameters are invalid, a result count of C<undef> will be
124 :     returned and a result message will be stored in this object describing the problem.
125 :    
126 :     =cut
127 :    
128 :     sub Find {
129 :     my ($self) = @_;
130 :     # Get the CGI and Sprout objects.
131 :     my $cgi = $self->Q();
132 :     my $sprout = $self->DB();
133 :     # Declare the return variable. If it remains undefined, the caller will
134 :     # know that an error occurred.
135 :     my $retVal;
136 : parrello 1.9 # Get the result helper.
137 :     my $rhelp = RHFeatures->new($self);
138 :     # Validate the filtering parameters.
139 :     $rhelp->KeywordsRequired();
140 :     if ($rhelp->Valid()) {
141 : parrello 1.1 # Initialize the result counter.
142 :     $retVal = 0;
143 : parrello 1.9 # Get the default columns.
144 :     $self->DefaultColumns($rhelp);
145 :     Trace("Column list is " . join(", ", @{$rhelp->GetColumnHeaders()})) if T(3);
146 :     # Start the output session.
147 :     $self->OpenSession($rhelp);
148 :     # Get the keywords.
149 :     my $keywords = $cgi->param('keywords') || '';
150 : parrello 1.1 # Check for groups.
151 :     my @groups = $cgi->param('group');
152 :     if (@groups) {
153 :     # Here we do the search a group at a time.
154 :     for my $group (@groups) {
155 : parrello 1.7 Trace("Starting the search.") if T(3);
156 : parrello 1.8 $self->PrintLine("Submitting search query for $group.<br />");
157 : parrello 1.12 my $query = $sprout->Search($keywords, 0, ['Feature', 'IsInGenome', 'Genome'],
158 : parrello 1.1 "Genome(primary-group) = ?", [$group]);
159 : parrello 1.7 Trace("Processing results.") if T(3);
160 : parrello 1.9 $retVal += $self->ProcessQuery($rhelp, $query);
161 : parrello 1.7 Trace("Results processed.") if T(3);
162 : parrello 1.1 }
163 :     } else {
164 :     # Here we do one search just for features.
165 : parrello 1.8 Trace("Starting the search.") if T(3);
166 :     $self->PrintLine("Submitting search query for all genomes.<br />");
167 : parrello 1.1 my $query = $sprout->Search($keywords, 0, ['Feature']);
168 : parrello 1.8 Trace("Processing results.") if T(3);
169 : parrello 1.9 $retVal += $self->ProcessQuery($rhelp, $query);
170 : parrello 1.8 Trace("Results processed.") if T(3);
171 : parrello 1.1 }
172 :     # Close the session file.
173 :     $self->CloseSession();
174 : parrello 1.7 Trace("Session closed.") if T(3);
175 : parrello 1.1 }
176 :     # Return the result count.
177 :     return $retVal;
178 :     }
179 :    
180 : parrello 1.9 =head3 SearchTitle
181 :    
182 : parrello 1.10 my $titleHtml = $shelp->SearchTitle();
183 : parrello 1.9
184 :     Return the display title for this search. The display title appears above the search results.
185 :     If no result is returned, no title will be displayed. The result should be an html string
186 :     that can be legally put inside a block tag such as C<h3> or C<p>.
187 :    
188 :     =cut
189 :    
190 :     sub SearchTitle {
191 :     # Get the parameters.
192 :     my ($self) = @_;
193 :     # Compute the title.
194 :     my $cgi = $self->Q();
195 :     my $words = $cgi->param('keywords');
196 :     my $retVal = "Keyword Search for $words.";
197 :     # Return it.
198 :     return $retVal;
199 :     }
200 :    
201 : parrello 1.1 =head3 Description
202 :    
203 : parrello 1.10 my $htmlText = $shelp->Description();
204 : parrello 1.1
205 :     Return a description of this search. The description is used for the table of contents
206 :     on the main search tools page. It may contain HTML, but it should be character-level,
207 :     not block-level, since the description is going to appear in a list.
208 :    
209 :     =cut
210 :    
211 :     sub Description {
212 :     # Get the parameters.
213 :     my ($self) = @_;
214 :     # Return the result.
215 : parrello 1.11 return "Search for %FIG{genes}% based on [[FIG.KeywordBox][keywords]].";
216 : parrello 1.1 }
217 :    
218 :     =head3 ProcessQuery
219 :    
220 : parrello 1.10 my $count = $shelp->ProcessQuery($rhelp, $query);
221 : parrello 1.1
222 :     Run through the results of a query, sending all the features retrieved to the output
223 :     cache. The number of features found will be returned to the caller.
224 :    
225 :     =over 4
226 :    
227 : parrello 1.9 =item rhelp
228 :    
229 :     Current result helper object, which should be B<RHFeatures>.
230 :    
231 : parrello 1.1 =item query
232 :    
233 : parrello 1.11 A B<ERDBQuery> object that returns features.
234 : parrello 1.1
235 :     =item RETURN
236 :    
237 :     Returns the number of features found.
238 :    
239 :     =back
240 :    
241 :     =cut
242 :    
243 :     sub ProcessQuery {
244 :     # Get the parameters.
245 : parrello 1.9 my ($self, $rhelp, $query) = @_;
246 : parrello 1.8 my $cgi = $self->Q();
247 : parrello 1.1 # Clear the result counter.
248 :     my $retVal = 0;
249 : parrello 1.8 $self->PrintLine("Processing query results.<br />");
250 : parrello 1.2 Trace("Starting feature loop.") if T(3);
251 : parrello 1.1 # Loop through all the records returned by the query.
252 :     while (my $record = $query->Fetch()) {
253 : parrello 1.9 # Compute the sort key.
254 :     my $sort = $rhelp->SortKey($record);
255 : parrello 1.1 # Store this feature.
256 : parrello 1.9 $rhelp->PutData($sort, $record->PrimaryValue('Feature(id)'), $record);
257 : parrello 1.1 # Increment the result counter.
258 :     $retVal++;
259 : parrello 1.8 if ($retVal % 100 == 0) {
260 :     $self->PrintLine("$retVal results processed.<br />");
261 :     }
262 : parrello 1.1 }
263 : parrello 1.8 $self->PrintLine("Results found: $retVal.<br />");
264 : parrello 1.1 # Return the counter.
265 :     return $retVal;
266 :     }
267 :    
268 : parrello 1.10 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3