[Bio] / Sprout / SHWordSearch.pm Repository:
ViewVC logotype

Annotation of /Sprout/SHWordSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SHWordSearch;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use SearchHelper;
8 :     use CGI;
9 :     use HTML;
10 :     use Sprout;
11 :     use FeatureData;
12 :     use FeatureQuery;
13 :    
14 :     our @ISA = qw(SearchHelper);
15 :    
16 :     =head1 Simple Keyword Search Feature Search Helper
17 :    
18 :     =head2 Introduction
19 :    
20 :     This is a basic keyword search engine. Keyword searching is a subset of
21 :     the FidSearch mechanism, which allows filtering by keyword amongst a
22 :     host of other options; however, having a separate search class gives
23 :     new users a way to search without all the extra clutter.
24 :    
25 :     It has the following extra parameters.
26 :    
27 :     =over 4
28 :    
29 :     =item keywords
30 :    
31 :     Search expression. This is essentially a space-delimited list of words with the following
32 :     optional operators applied.
33 :    
34 :     C<+>: A leading plus sign indicates that this word must be present in every row returned.
35 :    
36 :     C<->: A leading minus sign indicates that this word must not be present in any row returned.
37 :     Note that if every search term has a leading minus sign, nothing will match. This is an
38 :     artifact of the search algorithm.
39 :    
40 :     B<(no operator)>: By default (when neither + nor - is specified) the word is optional, but the
41 :     rows that contain it are rated higher.
42 :    
43 :     C<< > < >>: These two operators are used to change a word's contribution to the relevance value
44 :     that is assigned to a row. The C<< > >> operator increases the contribution and the C<< < >>
45 :     operator decreases it.
46 :    
47 :     C<( )>: Parentheses are used to group words into subexpressions. Parenthesized groups can be nested.
48 :    
49 :     C<~>: A leading tilde acts as a negation operator, causing the word's contribution to the row
50 :     relevance to be negative. It's useful for marking noise words. A row that contains such a
51 :     word is rated lower than others, but is not excluded altogether, as it would be with the C<->
52 :     operator.
53 :    
54 :     C<*>: An asterisk is the truncation operator. Unlike the other operators, it should be appended to the word.
55 :    
56 :     C<"> A phrase that is enclosed within double quote characters matches only rows that contain the phrase
57 :     literally, as it was typed.
58 :    
59 :     =item group[]
60 :    
61 :     If specified, these should be the names of NMPDR groups to which the search is to be
62 :     restricted. Otherwise, all groups are searched. This parameter is not on the form; rather,
63 :     it is provided as a quick way to do keyword searches restricted to groups on pages that
64 :     want to provide that capability.
65 :    
66 :     =back
67 :    
68 :     =head2 Virtual Methods
69 :    
70 :     =head3 Form
71 :    
72 :     C<< my $html = $shelp->Include(); >>
73 :    
74 :     Generate the HTML for a form to request a new search.
75 :    
76 :     =cut
77 :    
78 :     sub Form {
79 :     # Get the parameters.
80 :     my ($self) = @_;
81 :     # Get the CGI and sprout objects.
82 :     my $cgi = $self->Q();
83 :     my $sprout = $self->DB();
84 :     # Start the form.
85 :     my $retVal = $self->FormStart("Keyword Search");
86 :     # Declare a variable to hold the table rows.
87 :     my @rows = ();
88 :     # The first row is for the keyword search expression.
89 :     my $expressionString = $cgi->param('keywords') || "";
90 :     push @rows, $cgi->Tr($cgi->td("Search Words"),
91 : parrello 1.3 $cgi->td($cgi->textfield(-name => 'keywords',
92 :     -value => $expressionString,
93 :     -size => 40)),
94 :     $cgi->td($cgi->submit(-name => 'Alternate',
95 :     -value => 'Advanced',
96 :     -class => 'button')),
97 :     );
98 : parrello 1.1 # The other row is for the submit button.
99 :     push @rows, $self->SubmitRow();
100 :     # Finally, if groups are specified, we include them as hidden fields and display
101 :     # an explanation.
102 :     my @groups = $cgi->param('group');
103 :     my $groupCount = scalar(@groups);
104 :     if ($groupCount) {
105 :     # The explanation format is a bit tricky because of the way the English language
106 :     # uses commas and conjunctions.
107 :     my $message = "Search restricted to ";
108 :     my $last = pop @groups;
109 :     if ($groupCount == 1) {
110 :     $message .= "$last.";
111 :     } else {
112 :     $message .= join(", ", @groups) . " and $last.";
113 :     }
114 :     # Assemble the hidden fields.
115 :     my @hiddens = map { $cgi->hidden(-name => 'group', -value => $_) } @groups, $last;
116 :     push @rows, $cgi->Tr($cgi->td(@hiddens), $cgi->td($message));
117 :     }
118 :     # Create the table.
119 :     $retVal .= $self->MakeTable(\@rows);
120 :     # Close the form.
121 :     $retVal .= $self->FormEnd();
122 :     # Return the result.
123 :     return $retVal;
124 :     }
125 :    
126 :     =head3 Find
127 :    
128 :     C<< my $resultCount = $shelp->Find(); >>
129 :    
130 :     Conduct a search based on the current CGI query parameters. The search results will
131 :     be written to the session cache file and the number of results will be
132 :     returned. If the search parameters are invalid, a result count of C<undef> will be
133 :     returned and a result message will be stored in this object describing the problem.
134 :    
135 :     =cut
136 :    
137 :     sub Find {
138 :     my ($self) = @_;
139 :     # Get the CGI and Sprout objects.
140 :     my $cgi = $self->Q();
141 :     my $sprout = $self->DB();
142 :     # Declare the return variable. If it remains undefined, the caller will
143 :     # know that an error occurred.
144 :     my $retVal;
145 :     # Get the keyword list. We need to prove that there is at least one keyword
146 :     # that does not begin with a "-".
147 :     my $keywords = $cgi->param('keywords') || "";
148 :     my @wordList = split /\s+/, $keywords;
149 :     my @plusWords = grep { $_ =~ /^[^\-]/ } @wordList;
150 :     if (! @wordList) {
151 :     $self->SetMessage("No search words specified.");
152 :     } elsif (! @plusWords) {
153 :     $self->SetMessage("At least one keyword must be positive. All the keywords entered are preceded by minus signs.");
154 :     } else {
155 :     # We have good keywords. Initialize the session file.
156 :     $self->OpenSession();
157 :     # Initialize the result counter.
158 :     $retVal = 0;
159 :     # Check for groups.
160 :     my @groups = $cgi->param('group');
161 :     if (@groups) {
162 :     # Here we do the search a group at a time.
163 :     for my $group (@groups) {
164 :     my $query = $sprout->Search($keywords, 0, ['Feature', 'HasFeature', 'Genome'],
165 :     "Genome(primary-group) = ?", [$group]);
166 :     $retVal += $self->ProcessQuery($query);
167 :     }
168 :     } else {
169 :     # Here we do one search just for features.
170 :     my $query = $sprout->Search($keywords, 0, ['Feature']);
171 :     $retVal += $self->ProcessQuery($query);
172 :     }
173 :     # Close the session file.
174 :     $self->CloseSession();
175 :     }
176 :     # Return the result count.
177 :     return $retVal;
178 :     }
179 :    
180 :     =head3 Description
181 :    
182 :     C<< my $htmlText = $shelp->Description(); >>
183 :    
184 :     Return a description of this search. The description is used for the table of contents
185 :     on the main search tools page. It may contain HTML, but it should be character-level,
186 :     not block-level, since the description is going to appear in a list.
187 :    
188 :     =cut
189 :    
190 :     sub Description {
191 :     # Get the parameters.
192 :     my ($self) = @_;
193 :     # Return the result.
194 :     return "Search for features based on keywords.";
195 :     }
196 :    
197 :     =head3 ProcessQuery
198 :    
199 :     C<< my $count = $shelp->ProcessQuery($query); >>
200 :    
201 :     Run through the results of a query, sending all the features retrieved to the output
202 :     cache. The number of features found will be returned to the caller.
203 :    
204 :     =over 4
205 :    
206 :     =item query
207 :    
208 :     A B<DBQuery> object that returns features.
209 :    
210 :     =item RETURN
211 :    
212 :     Returns the number of features found.
213 :    
214 :     =back
215 :    
216 :     =cut
217 :    
218 :     sub ProcessQuery {
219 :     # Get the parameters.
220 :     my ($self, $query) = @_;
221 :     # Clear the result counter.
222 :     my $retVal = 0;
223 :     # Create a feature data object for storing each result.
224 :     my $fd = FeatureData->new($self);
225 : parrello 1.2 Trace("Starting feature loop.") if T(3);
226 : parrello 1.1 # Loop through all the records returned by the query.
227 :     while (my $record = $query->Fetch()) {
228 :     # Store this feature.
229 :     $fd->Store($record);
230 :     # Send it to the output.
231 :     $self->PutFeature($fd);
232 :     # Increment the result counter.
233 :     $retVal++;
234 :     }
235 :     # Return the counter.
236 :     return $retVal;
237 :     }
238 :    
239 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3