[Bio] / Sprout / ERDBFinder.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBFinder.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package ERDBFinder;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use Data::Dumper;
26 :    
27 :    
28 :     =head1 ERDBFinder Package
29 :    
30 :     =head2 Introduction
31 :    
32 :     This object is used to convert a list of criteria to a list of database objects.
33 :     The objects returned will all be of the same type, and it must be an entity type.
34 :    
35 :     The criteria are coded as n-tuples. Each n-tuple consists of a logical
36 :     operator (C<AND> or C<NOT>), a criterion name, and zero or more parameter
37 :     values.
38 :    
39 :     The criterion names are interpreted by a hash that is passed in to the object
40 :     constructor. For each criterion name, the hash specifies an I<object name string>
41 :     and a I<filter string>. The type of the desired entity objects is put in front of the
42 :     object name string, and the object name string, filter string, and criteria parameters
43 :     are all passed into the [[ErdbPm#Get]] function to return the desired objects. For
44 :     example, consider a search for C<Feature> objects, and we want to be able to search
45 :     on EC number. The criterion definition for C<EC number> would be something like this:
46 :    
47 :     'EC number' => { objects => 'IsRoleOf HasRoleEC',
48 :     filter => 'HasRoleEC(to-link) = ?' }
49 :    
50 :     If the incoming criteria tuple is
51 :    
52 :     ['EC number', '2.7.6.3']
53 :    
54 :     then the ultimate C<Get> call is
55 :    
56 :     $erdb->Get('Feature IsRoleOf HasRoleEC', 'HasRoleEC(to-link) = ?', ['2.7.6.3']);
57 :    
58 :     Since we are looking for features, C<Feature> is automatically put at the beginning of
59 :     the object name list.
60 :    
61 :     =head2 Object Definition
62 :    
63 :     The fields in this object are as follows.
64 :    
65 :     =over 4
66 :    
67 :     =item entityType
68 :    
69 :     The type of entity being sought by this finder object (e.g. C<Feature>, C<Subsystem>).
70 :    
71 :     =item erdb
72 :    
73 :     [[ErdbPm]] database object to be used to get the data.
74 :    
75 :     =item fieldHash
76 :    
77 :     Reference to a hash keyed on incoming field name. For each field, the value is a
78 :     sub-hash with two string fields: C<objects> contains the object name string and
79 :     C<filter> contains the filter clause string. The object name string and the
80 :     filter clause string are combined with incoming parameters to create an
81 :     [[ErdbPm#Get]] query that returns the desired objects.
82 :    
83 :     =back
84 :    
85 :     =cut
86 :    
87 :     =head3 new
88 :    
89 :     my $ff = ERDBFinder->new($erdb, $entityType, \%fieldHash);
90 :    
91 :     Construct a new ERDBFinder object for the specified database.
92 :    
93 :     =over 4
94 :    
95 :     =back
96 :    
97 :     =cut
98 :    
99 :     sub new {
100 :     # Get the parameters.
101 :     my ($class, $erdb, $entityType, $fieldHash) = @_;
102 :     # Create the ERDBFinder object.
103 :     my $retVal = {
104 :     entityType => $entityType,
105 :     erdb => $erdb,
106 :     fieldHash => $fieldHash,
107 :     };
108 :     Trace("Criterion hash:\n" . Data::Dumper::Dumper($fieldHash)) if T(4);
109 :     # Bless and return it.
110 :     bless $retVal, $class;
111 :     return $retVal;
112 :     }
113 :    
114 :     =head2 Public Methods
115 :    
116 :     =head3 Find
117 :    
118 :     my %results = $ff->Find($criteria);
119 :    
120 :     Use the incoming criteria to return a hash of [[ERDBObjectPm]] objects
121 :     for the desired data.
122 :    
123 :     =over 4
124 :    
125 :     =item criteria
126 :    
127 :     A reference to a list of search criteria. Each element in the list is an n-tuple
128 :     consisting of a logical operator (C<AND> or C<NOT>), a criterion name that
129 :     matches one of the keys of the field hash passed to the constructor, and zero
130 :     or more parameter values. Criteria used to form the result set are removed from
131 :     the list, so when the method returns, any unrecognized criteria will still be
132 :     present, and can be processed separately.
133 :    
134 :     =item RETURN
135 :    
136 :     Returns a hash of [[ERDBObjectPm]] objects for this finder's entity type, keyed
137 :     on the entity ID. All of the objects must satisfy the incoming criteria.
138 :    
139 :     =back
140 :    
141 :     =cut
142 :    
143 :     sub Find {
144 :     # Get the parameters.
145 :     my ($self, $criteria) = @_;
146 :     # Create the return hash.
147 :     my %retVal = ();
148 :     Trace("ERDBFinder now finding.") if T(3);
149 :     # Form a query out of as many criteria as we can.
150 :     my $found = $self->PeelQuery($criteria, \%retVal);
151 :     # Keep performing queries until we run out of criteria or we get an empty set.
152 :     while ($found && scalar(keys %retVal)) {
153 :     # Create a buffer for the results of the next query.
154 :     my %buffer;
155 :     # Perform the query.
156 :     $found = $self->PeelQuery($criteria, \%buffer);
157 :     # If we found criteria we could use, merge in the new values found.
158 :     if ($found) {
159 :     # This is an AND merge. We get all the keys in the current hash
160 :     # and delete any that are NOT found in the buffer hash.
161 :     my @old = keys %retVal;
162 :     for my $oldKey (@old) {
163 :     if (! exists $buffer{$oldKey}) {
164 :     delete $retVal{$oldKey};
165 :     }
166 :     }
167 :     }
168 :     }
169 :     # Return the found objects.
170 :     return %retVal;
171 :     }
172 :    
173 :     =head3 PeelQuery
174 :    
175 :     my $found = $ff->PeelQuery($criteria, \%buffer);
176 :    
177 :     Use as many criteria as possible to create a query and store the results
178 :     in the specified buffer. This method returns TRUE if a query was executed
179 :     and FALSE otherwise. It can therefore be called repeatedly until it
180 :     returns FALSE and the results merged by the calling process.
181 :    
182 :     =over 4
183 :    
184 :     =item criteria
185 :    
186 :     A reference to a list of search criteria. Each element in the list is an n-tuple
187 :     consisting of a logical operator (C<AND> or C<NOT>), a criterion name that
188 :     matches one of the keys of the field hash passed to the constructor, and zero
189 :     or more parameter values. Criteria used to form the result set are removed from
190 :     the list, so when the method returns, any unrecognized criteria will still be
191 :     present, and can be processed separately.
192 :    
193 :     =item buffer
194 :    
195 :     A hash into which the results of the query will be stored. The hash will be keyed
196 :     on object ID and the value will be an [[ERDBObjectPm]] object for the entity of this
197 :     finder's target type with the specified ID.
198 :    
199 :     =item RETURN
200 :    
201 :     Returns TRUE if at least one criterion was used to make a query, else FALSE.
202 :    
203 :     =back
204 :    
205 :     =cut
206 :    
207 :     sub PeelQuery {
208 :     # Get the parameters.
209 :     my ($self, $criteria, $buffer) = @_;
210 :     Trace("Incoming criteria are\n" . Data::Dumper::Dumper($criteria)) if T(4);
211 :     # Declare the return variable. We'll set it to TRUE if we find a criterion.
212 :     my $retVal = 0;
213 :     # We need to accumulate a filter clause list and a parameter list for
214 :     # the eventual query.
215 :     my @filters;
216 :     my @parms;
217 :     # We also need an object name string. This begins with the target entity type name.
218 :     my $objectNames = $self->{entityType};
219 :     # Now we loop through the criterion list. We'll save the criteria we don't use in
220 :     # this list.
221 :     my @saved;
222 :     # it, or skip it.
223 :     my $entry;
224 :     while (defined($entry = pop @$criteria)) {
225 :     # Grab the criterion data.
226 :     my ($operator, $fieldName, @newParms) = @$entry;
227 :     Trace("Processing \"$fieldName\" for $operator.") if T(3);
228 :     # Skip operators we don't understand.
229 :     if ($operator ne 'AND' && $operator ne 'NOT') {
230 :     push @saved, $entry;
231 :     } else {
232 :     # Look for this field name in the field hash.
233 :     my $fieldDescriptor = $self->{fieldHash}->{$fieldName};
234 :     # Skip this criterion if we didn't find it.
235 :     if (! $fieldDescriptor) {
236 :     push @saved, $entry;
237 :     } else {
238 :     # We found it. Check to see if the object name list is compatible. This
239 :     # can happen in three ways: (1) the list of names is already present, (2) the
240 :     # list of names is empty, OR the last name in the list is a relationship
241 :     # with the target entity type as a FROM or a TO. First, we need to get
242 :     # the data from the descriptor.
243 :     my $newObjectNames = $fieldDescriptor->{objects} || "";
244 :     my $newFilterClause = $fieldDescriptor->{filter};
245 :     Trace("New object name string is \"$newObjectNames\".") if T(4);
246 :     # Before we go too far, we need to do an error check. Does the number of
247 :     # parameter marks match the number of parameters?
248 :     my $markCount = grep { $_ eq '?' } split /(\?)/, $newFilterClause;
249 :     Confess("Invalid parameter specification for $fieldName.")
250 :     if ($markCount ne scalar(@newParms));
251 :     # Now we check for our three possibilities. We'll set this flag to TRUE
252 :     # if we want to use this criterion.
253 :     my $okToUse = 0;
254 :     if (! $newObjectNames || index($objectNames, $newObjectNames) >= 0) {
255 :     # Here we have an easy case. We can use the criterion without modifying
256 :     # the object name string.
257 :     $okToUse = 1;
258 :     } else {
259 :     # Here we have a more complicated case. We need to know if we can add the object name list
260 :     # to the current list. Get the name of the last object in the list. There must always
261 :     # be at least one, because we prime it with the target type.
262 :     $objectNames =~ /(\S+)$/;
263 :     my $lastGuy = $1;
264 :     if ($lastGuy eq $self->{entityType}) {
265 :     # Here we're okay, because the last thing in the list is our target.
266 :     $okToUse = 1;
267 :     } elsif (grep { $_ eq $self->{entityType} } $self->{erdb}->GetRelationshipEntities($lastGuy)) {
268 :     # Here we're okay as well.
269 :     $okToUse = 1;
270 :     }
271 :     # Are we going to be able to use this criterion?
272 :     if ($okToUse) {
273 :     # Yes. Add our new object names to the object name string.
274 :     $objectNames .= " $newObjectNames";
275 :     Trace("Updated object name string is \"$objectNames\".") if T(4);
276 :     }
277 :     }
278 :     # Now we know whether or not we can use this criterion.
279 :     if (! $okToUse) {
280 :     # We can't, so skip it.
281 :     push @saved, $entry;
282 :     } else {
283 :     # Now we must add the filter clause for this criterion.
284 :     if ($operator eq 'NOT') {
285 :     $newFilterClause = "NOT ($newFilterClause)";
286 :     }
287 :     push @filters, "($newFilterClause)";
288 :     push @parms, @newParms;
289 :     # Denote we've found at least one criterion.
290 :     $retVal = 1;
291 :     }
292 :     }
293 :     }
294 :     }
295 :     # Can we make a query?
296 :     if ($retVal) {
297 :     # Yes. Organize the filter strings.
298 :     my $filter = join(" AND ", @filters);
299 :     Trace("Filter = \"$filter\" with " . scalar(@parms) . " parameters.") if T(3);
300 :     my $query = $self->{erdb}->Get($objectNames, join(" AND ", @filters), \@parms);
301 :     # Loop through the results, storing them in the return hash by ID.
302 :     while (my $object = $query->Fetch()) {
303 :     my $id = $object->PrimaryValue("$self->{entityType}(id)");
304 :     $buffer->{$id} = $object;
305 :     }
306 :     }
307 :     # Return the result.
308 :     return $retVal;
309 :     }
310 :    
311 :    
312 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3