2 |
|
|
3 |
use strict; |
use strict; |
4 |
use Tracer; |
use Tracer; |
5 |
use DBKernel; |
use DBrtns; |
6 |
use Data::Dumper; |
use Data::Dumper; |
7 |
use XML::Simple; |
use XML::Simple; |
8 |
use DBQuery; |
use DBQuery; |
9 |
use DBObject; |
use DBObject; |
10 |
use Stats; |
use Stats; |
11 |
use Time::HiRes qw(gettimeofday); |
use Time::HiRes qw(gettimeofday); |
12 |
|
use FIG; |
13 |
|
|
14 |
=head1 Entity-Relationship Database Package |
=head1 Entity-Relationship Database Package |
15 |
|
|
33 |
relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>). |
relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>). |
34 |
The B<FEATURE> entity also contains an optional virulence number. This is implemented |
The B<FEATURE> entity also contains an optional virulence number. This is implemented |
35 |
as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number |
as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number |
36 |
(C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in the |
(C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in |
37 |
C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence number. |
the C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence |
38 |
If the virulence of I<ABC> is not known, there will not be any rows for it in C<FeatureVirulence>. |
number. If the virulence of I<ABC> is not known, there will not be any rows for it in |
39 |
|
C<FeatureVirulence>. |
40 |
|
|
41 |
Entities are connected by binary relationships implemented using single relations possessing the |
Entities are connected by binary relationships implemented using single relations possessing the |
42 |
same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>), |
same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>), |
71 |
is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet |
is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet |
72 |
fully implemented. |
fully implemented. |
73 |
|
|
74 |
|
=head2 XML Database Description |
75 |
|
|
76 |
|
=head3 Data Types |
77 |
|
|
78 |
|
The ERDB system supports the following data types. Note that there are numerous string |
79 |
|
types depending on the maximum length. Some database packages limit the total number of |
80 |
|
characters you have in an index key; to insure the database works in all environments, |
81 |
|
the type of string should be the shortest one possible that supports all the known values. |
82 |
|
|
83 |
|
=over 4 |
84 |
|
|
85 |
|
=item char |
86 |
|
|
87 |
|
single ASCII character |
88 |
|
|
89 |
|
=item int |
90 |
|
|
91 |
|
32-bit signed integer |
92 |
|
|
93 |
|
=item date |
94 |
|
|
95 |
|
64-bit unsigned integer, representing a PERL date/time value |
96 |
|
|
97 |
|
=item text |
98 |
|
|
99 |
|
long string; Text fields cannot be used in indexes or sorting and do not support the |
100 |
|
normal syntax of filter clauses, but can be up to a billion character in length |
101 |
|
|
102 |
|
=item float |
103 |
|
|
104 |
|
double-precision floating-point number |
105 |
|
|
106 |
|
=item boolean |
107 |
|
|
108 |
|
single-bit numeric value; The value is stored as a 16-bit signed integer (for |
109 |
|
compatability with certain database packages), but the only values supported are |
110 |
|
0 and 1. |
111 |
|
|
112 |
|
=item key-string |
113 |
|
|
114 |
|
variable-length string, maximum 40 characters |
115 |
|
|
116 |
|
=item name-string |
117 |
|
|
118 |
|
variable-length string, maximum 80 characters |
119 |
|
|
120 |
|
=item medium-string |
121 |
|
|
122 |
|
variable-length string, maximum 160 characters |
123 |
|
|
124 |
|
=item string |
125 |
|
|
126 |
|
variable-length string, maximum 255 characters |
127 |
|
|
128 |
|
=back |
129 |
|
|
130 |
|
=head3 Global Tags |
131 |
|
|
132 |
|
The entire database definition must be inside a B<Database> tag. The display name of |
133 |
|
the database is given by the text associated with the B<Title> tag. The display name |
134 |
|
is only used in the automated documentation. It has no other effect. The entities and |
135 |
|
relationships are listed inside the B<Entities> and B<Relationships> tags, |
136 |
|
respectively. None of these tags have attributes. |
137 |
|
|
138 |
|
<Database> |
139 |
|
<Title>... display title here...</Title> |
140 |
|
<Entities> |
141 |
|
... entity definitions here ... |
142 |
|
</Entities> |
143 |
|
<Relationships> |
144 |
|
... relationship definitions here... |
145 |
|
</Relationships> |
146 |
|
</Database> |
147 |
|
|
148 |
|
Entities, relationships, indexes, and fields all allow a text tag called B<Notes>. |
149 |
|
The text inside the B<Notes> tag contains comments that will appear when the database |
150 |
|
documentation is generated. Within a B<Notes> tag, you may use C<[i]> and C<[/i]> for |
151 |
|
italics, C<[b]> and C<[/b]> for bold, and C<[p]> for a new paragraph. |
152 |
|
|
153 |
|
=head3 Fields |
154 |
|
|
155 |
|
Both entities and relationships have fields described by B<Field> tags. A B<Field> |
156 |
|
tag can have B<Notes> associated with it. The complete set of B<Field> tags for an |
157 |
|
object mus be inside B<Fields> tags. |
158 |
|
|
159 |
|
<Entity ... > |
160 |
|
<Fields> |
161 |
|
... Field tags ... |
162 |
|
</Fields> |
163 |
|
</Entity> |
164 |
|
|
165 |
|
The attributes for the B<Field> tag are as follows. |
166 |
|
|
167 |
|
=over 4 |
168 |
|
|
169 |
|
=item name |
170 |
|
|
171 |
|
Name of the field. The field name should contain only letters, digits, and hyphens (C<->), |
172 |
|
and the first character should be a letter. Most underlying databases are case-insensitive |
173 |
|
with the respect to field names, so a best practice is to use lower-case letters only. |
174 |
|
|
175 |
|
=item type |
176 |
|
|
177 |
|
Data type of the field. The legal data types are given above. |
178 |
|
|
179 |
|
=item relation |
180 |
|
|
181 |
|
Name of the relation containing the field. This should only be specified for entity |
182 |
|
fields. The ERDB system does not support optional fields or multi-occurring fields |
183 |
|
in the primary relation of an entity. Instead, they are put into secondary relations. |
184 |
|
So, for example, in the C<Genome> entity, the C<group-name> field indicates a special |
185 |
|
grouping used to select a subset of the genomes. A given genome may not be in any |
186 |
|
groups or may be in multiple groups. Therefore, C<group-name> specifies a relation |
187 |
|
value. The relation name specified must be a valid table name. By convention, it is |
188 |
|
usually the entity name followed by a qualifying word (e.g. C<GenomeGroup>). In an |
189 |
|
entity, the fields without a relation attribute are said to belong to the |
190 |
|
I<primary relation>. This relation has the same name as the entity itself. |
191 |
|
|
192 |
|
=back |
193 |
|
|
194 |
|
=head3 Indexes |
195 |
|
|
196 |
|
An entity can have multiple alternate indexes associated with it. The fields must |
197 |
|
be from the primary relation. The alternate indexes assist in ordering results |
198 |
|
from a query. A relationship can have up to two indexes-- a I<to-index> and a |
199 |
|
I<from-index>. These order the results when crossing the relationship. For |
200 |
|
example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the |
201 |
|
from-index would order the contigs of a ganome, and the to-index would order |
202 |
|
the genomes of a contig. A relationship's index must specify only fields in |
203 |
|
the relationship. |
204 |
|
|
205 |
|
The indexes for an entity must be listed inside the B<Indexes> tag. The from-index |
206 |
|
of a relationship is specified using the B<FromIndex> tag; the to-index is specified |
207 |
|
using the B<ToIndex> tag. |
208 |
|
|
209 |
|
Each index can contain a B<Notes> tag. In addition, it will have an B<IndexFields> |
210 |
|
tag containing the B<IndexField> tags. These specify, in order, the fields used in |
211 |
|
the index. The attributes of an B<IndexField> tag are as follows. |
212 |
|
|
213 |
|
=over 4 |
214 |
|
|
215 |
|
=item name |
216 |
|
|
217 |
|
Name of the field. |
218 |
|
|
219 |
|
=item order |
220 |
|
|
221 |
|
Sort order of the field-- C<ascending> or C<descending>. |
222 |
|
|
223 |
|
=back |
224 |
|
|
225 |
|
The B<Index>, B<FromIndex>, and B<ToIndex> tags themselves have no attributes. |
226 |
|
|
227 |
|
=head3 Object and Field Names |
228 |
|
|
229 |
|
By convention entity and relationship names use capital casing (e.g. C<Genome> or |
230 |
|
C<HasRegionsIn>. Most underlying databases, however, are aggressively case-insensitive |
231 |
|
with respect to relation names, converting them internally to all-upper case or |
232 |
|
all-lower case. |
233 |
|
|
234 |
|
If syntax or parsing errors occur when you try to load or use an ERDB database, the |
235 |
|
most likely reason is that one of your objects has an SQL reserved word as its name. |
236 |
|
The list of SQL reserved words keeps increasing; however, most are unlikely to show |
237 |
|
up as a noun or declarative verb phrase. The exceptions are C<Group>, C<User>, |
238 |
|
C<Table>, C<Index>, C<Object>, C<Date>, C<Number>, C<Update>, C<Time>, C<Percent>, |
239 |
|
C<Memo>, C<Order>, and C<Sum>. This problem can crop up in field names as well. |
240 |
|
|
241 |
|
Every entity has a field called C<id> that acts as its primary key. Every relationship |
242 |
|
has fields called C<from-link> and C<to-link> that contain copies of the relevant |
243 |
|
entity IDs. These are essentially ERDB's reserved words, and should not be used |
244 |
|
for user-defined field names. |
245 |
|
|
246 |
|
=head3 Entities |
247 |
|
|
248 |
|
An entity is described by the B<Entity> tag. The entity can contain B<Notes>, an |
249 |
|
B<Indexes> tag containing one or more secondary indexes, and a B<Fields> tag |
250 |
|
containing one or more fields. The attributes of the B<Entity> tag are as follows. |
251 |
|
|
252 |
|
=over 4 |
253 |
|
|
254 |
|
=item name |
255 |
|
|
256 |
|
Name of the entity. The entity name, by convention, uses capital casing (e.g. C<Genome> |
257 |
|
or C<GroupBlock>) and should be a noun or noun phrase. |
258 |
|
|
259 |
|
=item keyType |
260 |
|
|
261 |
|
Data type of the primary key. The primary key is always named C<id>. |
262 |
|
|
263 |
|
=back |
264 |
|
|
265 |
|
=head3 Relationships |
266 |
|
|
267 |
|
A relationship is described by the C<Relationship> tag. Within a relationship, |
268 |
|
there can be a C<Notes> tag, a C<Fields> tag containing the intersection data |
269 |
|
fields, a C<FromIndex> tag containing the from-index, and a C<ToIndex> tag containing |
270 |
|
the to-index. |
271 |
|
|
272 |
|
The C<Relationship> tag has the following attributes. |
273 |
|
|
274 |
|
=over 4 |
275 |
|
|
276 |
|
=item name |
277 |
|
|
278 |
|
Name of the relationship. The relationship name, by convention, uses capital casing |
279 |
|
(e.g. C<ContainsRegionIn> or C<HasContig>), and should be a declarative verb |
280 |
|
phrase, designed to fit between the from-entity and the to-entity (e.g. |
281 |
|
Block C<ContainsRegionIn> Genome). |
282 |
|
|
283 |
|
=item from |
284 |
|
|
285 |
|
Name of the entity from which the relationship starts. |
286 |
|
|
287 |
|
=item to |
288 |
|
|
289 |
|
Name of the entity to which the relationship proceeds. |
290 |
|
|
291 |
|
=item arity |
292 |
|
|
293 |
|
Relationship type: C<1M> for one-to-many and C<MM> for many-to-many. |
294 |
|
|
295 |
|
=back |
296 |
|
|
297 |
=cut |
=cut |
298 |
|
|
299 |
# GLOBALS |
# GLOBALS |
301 |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
302 |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
303 |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
304 |
#string is specified in the field definition. |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
305 |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, dataGen => "StringGen('A')" }, |
# record sizes. |
306 |
int => { sqlType => 'INTEGER', maxLen => 20, dataGen => "IntGen(0, 99999999)" }, |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, dataGen => "StringGen('A')" }, |
307 |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, dataGen => "StringGen(IntGen(10,250))" }, |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, dataGen => "IntGen(0, 99999999)" }, |
308 |
text => { sqlType => 'TEXT', maxLen => 1000000000, dataGen => "StringGen(IntGen(80,1000))" }, |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, dataGen => "StringGen(IntGen(10,250))" }, |
309 |
date => { sqlType => 'BIGINT', maxLen => 80, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" }, |
310 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, dataGen => "FloatGen(0.0, 100.0)" }, |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
311 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, dataGen => "IntGen(0, 1)" }, |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
312 |
|
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, dataGen => "IntGen(0, 1)" }, |
313 |
'key-string' => |
'key-string' => |
314 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
315 |
'name-string' => |
'name-string' => |
316 |
{ sqlType => 'VARCHAR(80)', maxLen => 80, dataGen => "StringGen(IntGen(10,80))" }, |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, dataGen => "StringGen(IntGen(10,80))" }, |
317 |
'medium-string' => |
'medium-string' => |
318 |
{ sqlType => 'VARCHAR(160)', maxLen => 160, dataGen => "StringGen(IntGen(10,160))" }, |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, dataGen => "StringGen(IntGen(10,160))" }, |
319 |
); |
); |
320 |
|
|
321 |
# Table translating arities into natural language. |
# Table translating arities into natural language. |
371 |
|
|
372 |
=head3 ShowMetaData |
=head3 ShowMetaData |
373 |
|
|
374 |
C<< $database->ShowMetaData($fileName); >> |
C<< $erdb->ShowMetaData($fileName); >> |
375 |
|
|
376 |
This method outputs a description of the database. This description can be used to help users create |
This method outputs a description of the database. This description can be used to help users create |
377 |
the data to be loaded into the relations. |
the data to be loaded into the relations. |
508 |
# Separate out the source, the target, and the join clause. |
# Separate out the source, the target, and the join clause. |
509 |
$joinKey =~ m!^([^/]+)/(.+)$!; |
$joinKey =~ m!^([^/]+)/(.+)$!; |
510 |
my ($sourceRelation, $targetRelation) = ($1, $2); |
my ($sourceRelation, $targetRelation) = ($1, $2); |
511 |
Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4); |
Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4); |
512 |
my $source = $self->ComputeObjectSentence($sourceRelation); |
my $source = $self->ComputeObjectSentence($sourceRelation); |
513 |
my $target = $self->ComputeObjectSentence($targetRelation); |
my $target = $self->ComputeObjectSentence($targetRelation); |
514 |
my $clause = $joinTable->{$joinKey}; |
my $clause = $joinTable->{$joinKey}; |
526 |
|
|
527 |
=head3 DumpMetaData |
=head3 DumpMetaData |
528 |
|
|
529 |
C<< $database->DumpMetaData(); >> |
C<< $erdb->DumpMetaData(); >> |
530 |
|
|
531 |
Return a dump of the metadata structure. |
Return a dump of the metadata structure. |
532 |
|
|
541 |
|
|
542 |
=head3 CreateTables |
=head3 CreateTables |
543 |
|
|
544 |
C<< $datanase->CreateTables(); >> |
C<< $erdb->CreateTables(); >> |
545 |
|
|
546 |
This method creates the tables for the database from the metadata structure loaded by the |
This method creates the tables for the database from the metadata structure loaded by the |
547 |
constructor. It is expected this function will only be used on rare occasions, when the |
constructor. It is expected this function will only be used on rare occasions, when the |
553 |
sub CreateTables { |
sub CreateTables { |
554 |
# Get the parameters. |
# Get the parameters. |
555 |
my ($self) = @_; |
my ($self) = @_; |
556 |
my $metadata = $self->{_metaData}; |
# Get the relation names. |
557 |
my $dbh = $self->{_dbh}; |
my @relNames = $self->GetTableNames(); |
558 |
# Loop through the entities. |
# Loop through the relations. |
559 |
my $entityHash = $metadata->{Entities}; |
for my $relationName (@relNames) { |
|
for my $entityName (keys %{$entityHash}) { |
|
|
my $entityData = $entityHash->{$entityName}; |
|
|
# Tell the user what we're doing. |
|
|
Trace("Creating relations for entity $entityName.") if T(1); |
|
|
# Loop through the entity's relations. |
|
|
for my $relationName (keys %{$entityData->{Relations}}) { |
|
560 |
# Create a table for this relation. |
# Create a table for this relation. |
561 |
$self->CreateTable($relationName); |
$self->CreateTable($relationName); |
562 |
Trace("Relation $relationName created.") if T(1); |
Trace("Relation $relationName created.") if T(2); |
|
} |
|
|
} |
|
|
# Loop through the relationships. |
|
|
my $relationshipTable = $metadata->{Relationships}; |
|
|
for my $relationshipName (keys %{$metadata->{Relationships}}) { |
|
|
# Create a table for this relationship. |
|
|
Trace("Creating relationship $relationshipName.") if T(1); |
|
|
$self->CreateTable($relationshipName); |
|
563 |
} |
} |
564 |
} |
} |
565 |
|
|
566 |
=head3 CreateTable |
=head3 CreateTable |
567 |
|
|
568 |
C<< $database->CreateTable($tableName, $indexFlag); >> |
C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >> |
569 |
|
|
570 |
Create the table for a relation and optionally create its indexes. |
Create the table for a relation and optionally create its indexes. |
571 |
|
|
575 |
|
|
576 |
Name of the relation (which will also be the table name). |
Name of the relation (which will also be the table name). |
577 |
|
|
578 |
=item $indexFlag |
=item indexFlag |
579 |
|
|
580 |
TRUE if the indexes for the relation should be created, else FALSE. If FALSE, |
TRUE if the indexes for the relation should be created, else FALSE. If FALSE, |
581 |
L</CreateIndexes> must be called later to bring the indexes into existence. |
L</CreateIndexes> must be called later to bring the indexes into existence. |
582 |
|
|
583 |
|
=item estimatedRows (optional) |
584 |
|
|
585 |
|
If specified, the estimated maximum number of rows for the relation. This |
586 |
|
information allows the creation of tables using storage engines that are |
587 |
|
faster but require size estimates, such as MyISAM. |
588 |
|
|
589 |
=back |
=back |
590 |
|
|
591 |
=cut |
=cut |
592 |
|
|
593 |
sub CreateTable { |
sub CreateTable { |
594 |
# Get the parameters. |
# Get the parameters. |
595 |
my ($self, $relationName, $indexFlag) = @_; |
my ($self, $relationName, $indexFlag, $estimatedRows) = @_; |
596 |
# Get the database handle. |
# Get the database handle. |
597 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
598 |
# Get the relation data and determine whether or not the relation is primary. |
# Get the relation data and determine whether or not the relation is primary. |
616 |
# Insure the table is not already there. |
# Insure the table is not already there. |
617 |
$dbh->drop_table(tbl => $relationName); |
$dbh->drop_table(tbl => $relationName); |
618 |
Trace("Table $relationName dropped.") if T(2); |
Trace("Table $relationName dropped.") if T(2); |
619 |
|
# If there are estimated rows, create an estimate so we can take advantage of |
620 |
|
# faster DB technologies. |
621 |
|
my $estimation = undef; |
622 |
|
if ($estimatedRows) { |
623 |
|
$estimation = [$self->EstimateRowSize($relationName), $estimatedRows]; |
624 |
|
} |
625 |
# Create the table. |
# Create the table. |
626 |
Trace("Creating table $relationName: $fieldThing") if T(2); |
Trace("Creating table $relationName: $fieldThing") if T(2); |
627 |
$dbh->create_table(tbl => $relationName, flds => $fieldThing); |
$dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation); |
628 |
Trace("Relation $relationName created in database.") if T(2); |
Trace("Relation $relationName created in database.") if T(2); |
629 |
# If we want to build the indexes, we do it here. |
# If we want to build the indexes, we do it here. |
630 |
if ($indexFlag) { |
if ($indexFlag) { |
632 |
} |
} |
633 |
} |
} |
634 |
|
|
635 |
|
=head3 VerifyFields |
636 |
|
|
637 |
|
C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >> |
638 |
|
|
639 |
|
Run through the list of proposed field values, insuring that all the character fields are |
640 |
|
below the maximum length. If any fields are too long, they will be truncated in place. |
641 |
|
|
642 |
|
=over 4 |
643 |
|
|
644 |
|
=item relName |
645 |
|
|
646 |
|
Name of the relation for which the specified fields are destined. |
647 |
|
|
648 |
|
=item fieldList |
649 |
|
|
650 |
|
Reference to a list, in order, of the fields to be put into the relation. |
651 |
|
|
652 |
|
=item RETURN |
653 |
|
|
654 |
|
Returns the number of fields truncated. |
655 |
|
|
656 |
|
=back |
657 |
|
|
658 |
|
=cut |
659 |
|
|
660 |
|
sub VerifyFields { |
661 |
|
# Get the parameters. |
662 |
|
my ($self, $relName, $fieldList) = @_; |
663 |
|
# Initialize the return value. |
664 |
|
my $retVal = 0; |
665 |
|
# Get the relation definition. |
666 |
|
my $relData = $self->_FindRelation($relName); |
667 |
|
# Get the list of field descriptors. |
668 |
|
my $fieldTypes = $relData->{Fields}; |
669 |
|
my $fieldCount = scalar @{$fieldTypes}; |
670 |
|
# Loop through the two lists. |
671 |
|
for (my $i = 0; $i < $fieldCount; $i++) { |
672 |
|
# Get the type of the current field. |
673 |
|
my $fieldType = $fieldTypes->[$i]->{type}; |
674 |
|
# If it's a character field, verify the length. |
675 |
|
if ($fieldType =~ /string/) { |
676 |
|
my $maxLen = $TypeTable{$fieldType}->{maxLen}; |
677 |
|
my $oldString = $fieldList->[$i]; |
678 |
|
if (length($oldString) > $maxLen) { |
679 |
|
# Here it's too big, so we truncate it. |
680 |
|
Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1); |
681 |
|
$fieldList->[$i] = substr $oldString, 0, $maxLen; |
682 |
|
$retVal++; |
683 |
|
} |
684 |
|
} |
685 |
|
} |
686 |
|
# Return the truncation count. |
687 |
|
return $retVal; |
688 |
|
} |
689 |
|
|
690 |
=head3 CreateIndex |
=head3 CreateIndex |
691 |
|
|
692 |
C<< $database->CreateIndex($relationName); >> |
C<< $erdb->CreateIndex($relationName); >> |
693 |
|
|
694 |
Create the indexes for a relation. If a table is being loaded from a large source file (as |
Create the indexes for a relation. If a table is being loaded from a large source file (as |
695 |
is the case in L</LoadTable>), it is best to create the indexes after the load. If that is |
is the case in L</LoadTable>), it is sometimes best to create the indexes after the load. |
696 |
the case, then L</CreateTable> should be called with the index flag set to FALSE, and this |
If that is the case, then L</CreateTable> should be called with the index flag set to |
697 |
method used after the load to create the indexes for the table. |
FALSE, and this method used after the load to create the indexes for the table. |
698 |
|
|
699 |
=cut |
=cut |
700 |
|
|
715 |
# Get the index's uniqueness flag. |
# Get the index's uniqueness flag. |
716 |
my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false'); |
my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false'); |
717 |
# Create the index. |
# Create the index. |
718 |
$dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique); |
my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName, |
719 |
|
flds => $flds, unique => $unique); |
720 |
|
if ($rv) { |
721 |
Trace("Index created: $indexName for $relationName ($flds)") if T(1); |
Trace("Index created: $indexName for $relationName ($flds)") if T(1); |
722 |
|
} else { |
723 |
|
Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message()); |
724 |
|
} |
725 |
} |
} |
726 |
} |
} |
727 |
|
|
728 |
=head3 LoadTables |
=head3 LoadTables |
729 |
|
|
730 |
C<< my $stats = $database->LoadTables($directoryName, $rebuild); >> |
C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >> |
731 |
|
|
732 |
This method will load the database tables from a directory. The tables must already have been created |
This method will load the database tables from a directory. The tables must already have been created |
733 |
in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name; |
in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name; |
770 |
$directoryName =~ s!/\\$!!; |
$directoryName =~ s!/\\$!!; |
771 |
# Declare the return variable. |
# Declare the return variable. |
772 |
my $retVal = Stats->new(); |
my $retVal = Stats->new(); |
773 |
# Get the metadata structure. |
# Get the relation names. |
774 |
my $metaData = $self->{_metaData}; |
my @relNames = $self->GetTableNames(); |
775 |
# Loop through the entities. |
for my $relationName (@relNames) { |
|
for my $entity (values %{$metaData->{Entities}}) { |
|
|
# Loop through the entity's relations. |
|
|
for my $relationName (keys %{$entity->{Relations}}) { |
|
776 |
# Try to load this relation. |
# Try to load this relation. |
777 |
my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild); |
my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild); |
778 |
# Accumulate the statistics. |
# Accumulate the statistics. |
779 |
$retVal->Accumulate($result); |
$retVal->Accumulate($result); |
780 |
} |
} |
|
} |
|
|
# Loop through the relationships. |
|
|
for my $relationshipName (keys %{$metaData->{Relationships}}) { |
|
|
# Try to load this relationship's relation. |
|
|
my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild); |
|
|
# Accumulate the statistics. |
|
|
$retVal->Accumulate($result); |
|
|
} |
|
781 |
# Add the duration of the load to the statistical object. |
# Add the duration of the load to the statistical object. |
782 |
$retVal->Add('duration', gettimeofday - $startTime); |
$retVal->Add('duration', gettimeofday - $startTime); |
783 |
# Return the accumulated statistics. |
# Return the accumulated statistics. |
784 |
return $retVal; |
return $retVal; |
785 |
} |
} |
786 |
|
|
787 |
|
|
788 |
=head3 GetTableNames |
=head3 GetTableNames |
789 |
|
|
790 |
C<< my @names = $database->GetTableNames; >> |
C<< my @names = $erdb->GetTableNames; >> |
791 |
|
|
792 |
Return a list of the relations required to implement this database. |
Return a list of the relations required to implement this database. |
793 |
|
|
804 |
|
|
805 |
=head3 GetEntityTypes |
=head3 GetEntityTypes |
806 |
|
|
807 |
C<< my @names = $database->GetEntityTypes; >> |
C<< my @names = $erdb->GetEntityTypes; >> |
808 |
|
|
809 |
Return a list of the entity type names. |
Return a list of the entity type names. |
810 |
|
|
819 |
return sort keys %{$entityList}; |
return sort keys %{$entityList}; |
820 |
} |
} |
821 |
|
|
822 |
|
=head3 IsEntity |
823 |
|
|
824 |
|
C<< my $flag = $erdb->IsEntity($entityName); >> |
825 |
|
|
826 |
|
Return TRUE if the parameter is an entity name, else FALSE. |
827 |
|
|
828 |
|
=over 4 |
829 |
|
|
830 |
|
=item entityName |
831 |
|
|
832 |
|
Object name to be tested. |
833 |
|
|
834 |
|
=item RETURN |
835 |
|
|
836 |
|
Returns TRUE if the specified string is an entity name, else FALSE. |
837 |
|
|
838 |
|
=back |
839 |
|
|
840 |
|
=cut |
841 |
|
|
842 |
|
sub IsEntity { |
843 |
|
# Get the parameters. |
844 |
|
my ($self, $entityName) = @_; |
845 |
|
# Test to see if it's an entity. |
846 |
|
return exists $self->{_metaData}->{Entities}->{$entityName}; |
847 |
|
} |
848 |
|
|
849 |
=head3 Get |
=head3 Get |
850 |
|
|
851 |
C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
852 |
|
|
853 |
This method returns a query object for entities of a specified type using a specified filter. |
This method returns a query object for entities of a specified type using a specified filter. |
854 |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
856 |
following call requests all B<Genome> objects for the genus specified in the variable |
following call requests all B<Genome> objects for the genus specified in the variable |
857 |
$genus. |
$genus. |
858 |
|
|
859 |
C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >> |
860 |
|
|
861 |
The WHERE clause contains a single question mark, so there is a single additional |
The WHERE clause contains a single question mark, so there is a single additional |
862 |
parameter representing the parameter value. It would also be possible to code |
parameter representing the parameter value. It would also be possible to code |
863 |
|
|
864 |
C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> |
865 |
|
|
866 |
however, this version of the call would generate a syntax error if there were any quote |
however, this version of the call would generate a syntax error if there were any quote |
867 |
characters inside the variable C<$genus>. |
characters inside the variable C<$genus>. |
873 |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
874 |
one object's data at the same time, which allows highly complex joined queries. For example, |
one object's data at the same time, which allows highly complex joined queries. For example, |
875 |
|
|
876 |
C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >> |
877 |
|
|
878 |
If multiple names are specified, then the query processor will automatically determine a |
If multiple names are specified, then the query processor will automatically determine a |
879 |
join path between the entities and relationships. The algorithm used is very simplistic. |
join path between the entities and relationships. The algorithm used is very simplistic. |
880 |
In particular, you can't specify any entity or relationship more than once, and if a |
In particular, if a relationship is recursive, the path is determined by the order in which |
881 |
relationship is recursive, the path is determined by the order in which the entity |
the entity and the relationship appear. For example, consider a recursive relationship |
882 |
and the relationship appear. For example, consider a recursive relationship B<IsParentOf> |
B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is |
|
which relates B<People> objects to other B<People> objects. If the join path is |
|
883 |
coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, |
coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, |
884 |
the join path is C<['IsParentOf', 'People']>, then the people returned will be children. |
the join path is C<['IsParentOf', 'People']>, then the people returned will be children. |
885 |
|
|
886 |
|
If an entity or relationship is mentioned twice, the name for the second occurrence will |
887 |
|
be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So, |
888 |
|
for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the |
889 |
|
B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while |
890 |
|
the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>. |
891 |
|
|
892 |
=over 4 |
=over 4 |
893 |
|
|
894 |
=item objectNames |
=item objectNames |
911 |
|
|
912 |
C<< "Genome(genus) = ? ORDER BY Genome(species)" >> |
C<< "Genome(genus) = ? ORDER BY Genome(species)" >> |
913 |
|
|
914 |
|
Note that the case is important. Only an uppercase "ORDER BY" with a single space will |
915 |
|
be processed. The idea is to make it less likely to find the verb by accident. |
916 |
|
|
917 |
The rules for field references in a sort order are the same as those for field references in the |
The rules for field references in a sort order are the same as those for field references in the |
918 |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
919 |
relation. |
relation. |
920 |
|
|
921 |
|
Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must |
922 |
|
be the last thing in the filter clause, and it contains only the word "LIMIT" followed by |
923 |
|
a positive number. So, for example |
924 |
|
|
925 |
|
C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >> |
926 |
|
|
927 |
|
will only return the first ten genomes for the specified genus. The ORDER BY clause is not |
928 |
|
required. For example, to just get the first 10 genomes in the B<Genome> table, you could |
929 |
|
use |
930 |
|
|
931 |
|
C<< "LIMIT 10" >> |
932 |
|
|
933 |
=item param1, param2, ..., paramN |
=item param1, param2, ..., paramN |
934 |
|
|
935 |
Parameter values to be substituted into the filter clause. |
Parameter values to be substituted into the filter clause. |
945 |
sub Get { |
sub Get { |
946 |
# Get the parameters. |
# Get the parameters. |
947 |
my ($self, $objectNames, $filterClause, @params) = @_; |
my ($self, $objectNames, $filterClause, @params) = @_; |
948 |
|
# Adjust the list of object names to account for multiple occurrences of the |
949 |
|
# same object. We start with a hash table keyed on object name that will |
950 |
|
# return the object suffix. The first time an object is encountered it will |
951 |
|
# not be found in the hash. The next time the hash will map the object name |
952 |
|
# to 2, then 3, and so forth. |
953 |
|
my %objectHash = (); |
954 |
|
# This list will contain the object names as they are to appear in the |
955 |
|
# FROM list. |
956 |
|
my @fromList = (); |
957 |
|
# This list contains the suffixed object name for each object. It is exactly |
958 |
|
# parallel to the list in the $objectNames parameter. |
959 |
|
my @mappedNameList = (); |
960 |
|
# Finally, this hash translates from a mapped name to its original object name. |
961 |
|
my %mappedNameHash = (); |
962 |
|
# Now we create the lists. Note that for every single name we push something into |
963 |
|
# @fromList and @mappedNameList. This insures that those two arrays are exactly |
964 |
|
# parallel to $objectNames. |
965 |
|
for my $objectName (@{$objectNames}) { |
966 |
|
# Get the next suffix for this object. |
967 |
|
my $suffix = $objectHash{$objectName}; |
968 |
|
if (! $suffix) { |
969 |
|
# Here we are seeing the object for the first time. The object name |
970 |
|
# is used as is. |
971 |
|
push @mappedNameList, $objectName; |
972 |
|
push @fromList, $objectName; |
973 |
|
$mappedNameHash{$objectName} = $objectName; |
974 |
|
# Denote the next suffix will be 2. |
975 |
|
$objectHash{$objectName} = 2; |
976 |
|
} else { |
977 |
|
# Here we've seen the object before. We construct a new name using |
978 |
|
# the suffix from the hash and update the hash. |
979 |
|
my $mappedName = "$objectName$suffix"; |
980 |
|
$objectHash{$objectName} = $suffix + 1; |
981 |
|
# The FROM list has the object name followed by the mapped name. This |
982 |
|
# tells SQL it's still the same table, but we're using a different name |
983 |
|
# for it to avoid confusion. |
984 |
|
push @fromList, "$objectName $mappedName"; |
985 |
|
# The mapped-name list contains the real mapped name. |
986 |
|
push @mappedNameList, $mappedName; |
987 |
|
# Finally, enable us to get back from the mapped name to the object name. |
988 |
|
$mappedNameHash{$mappedName} = $objectName; |
989 |
|
} |
990 |
|
} |
991 |
# Construct the SELECT statement. The general pattern is |
# Construct the SELECT statement. The general pattern is |
992 |
# |
# |
993 |
# SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN |
# SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN |
994 |
# |
# |
995 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
996 |
my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " . |
my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " . |
997 |
join(', ', @{$objectNames}); |
join(', ', @fromList); |
998 |
# Check for a filter clause. |
# Check for a filter clause. |
999 |
if ($filterClause) { |
if ($filterClause) { |
1000 |
# Here we have one, so we convert its field names and add it to the query. First, |
# Here we have one, so we convert its field names and add it to the query. First, |
1002 |
my $filterString = $filterClause; |
my $filterString = $filterClause; |
1003 |
# Next, we sort the object names by length. This helps protect us from finding |
# Next, we sort the object names by length. This helps protect us from finding |
1004 |
# object names inside other object names when we're doing our search and replace. |
# object names inside other object names when we're doing our search and replace. |
1005 |
my @sortedNames = sort { length($b) - length($a) } @{$objectNames}; |
my @sortedNames = sort { length($b) - length($a) } @mappedNameList; |
1006 |
# We will also keep a list of conditions to add to the WHERE clause in order to link |
# We will also keep a list of conditions to add to the WHERE clause in order to link |
1007 |
# entities and relationships as well as primary relations to secondary ones. |
# entities and relationships as well as primary relations to secondary ones. |
1008 |
my @joinWhere = (); |
my @joinWhere = (); |
1009 |
# The final preparatory step is to create a hash table of relation names. The |
# The final preparatory step is to create a hash table of relation names. The |
1010 |
# table begins with the relation names already in the SELECT command. |
# table begins with the relation names already in the SELECT command. We may |
1011 |
my %fromNames = (); |
# need to add relations later if there is filtering on a field in a secondary |
1012 |
for my $objectName (@sortedNames) { |
# relation. The secondary relations are the ones that contain multiply- |
1013 |
$fromNames{$objectName} = 1; |
# occurring or optional fields. |
1014 |
} |
my %fromNames = map { $_ => 1 } @sortedNames; |
1015 |
# We are ready to begin. We loop through the object names, replacing each |
# We are ready to begin. We loop through the object names, replacing each |
1016 |
# object name's field references by the corresponding SQL field reference. |
# object name's field references by the corresponding SQL field reference. |
1017 |
# Along the way, if we find a secondary relation, we will need to add it |
# Along the way, if we find a secondary relation, we will need to add it |
1018 |
# to the FROM clause. |
# to the FROM clause. |
1019 |
for my $objectName (@sortedNames) { |
for my $mappedName (@sortedNames) { |
1020 |
# Get the length of the object name plus 2. This is the value we add to the |
# Get the length of the object name plus 2. This is the value we add to the |
1021 |
# size of the field name to determine the size of the field reference as a |
# size of the field name to determine the size of the field reference as a |
1022 |
# whole. |
# whole. |
1023 |
my $nameLength = 2 + length $objectName; |
my $nameLength = 2 + length $mappedName; |
1024 |
|
# Get the real object name for this mapped name. |
1025 |
|
my $objectName = $mappedNameHash{$mappedName}; |
1026 |
|
Trace("Processing $mappedName for object $objectName.") if T(4); |
1027 |
# Get the object's field list. |
# Get the object's field list. |
1028 |
my $fieldList = $self->_GetFieldTable($objectName); |
my $fieldList = $self->GetFieldTable($objectName); |
1029 |
# Find the field references for this object. |
# Find the field references for this object. |
1030 |
while ($filterString =~ m/$objectName\(([^)]*)\)/g) { |
while ($filterString =~ m/$mappedName\(([^)]*)\)/g) { |
1031 |
# At this point, $1 contains the field name, and the current position |
# At this point, $1 contains the field name, and the current position |
1032 |
# is set immediately after the final parenthesis. We pull out the name of |
# is set immediately after the final parenthesis. We pull out the name of |
1033 |
# the field and the position and length of the field reference as a whole. |
# the field and the position and length of the field reference as a whole. |
1038 |
if (!exists $fieldList->{$fieldName}) { |
if (!exists $fieldList->{$fieldName}) { |
1039 |
Confess("Field $fieldName not found for object $objectName."); |
Confess("Field $fieldName not found for object $objectName."); |
1040 |
} else { |
} else { |
1041 |
|
Trace("Processing $fieldName at position $pos.") if T(4); |
1042 |
# Get the field's relation. |
# Get the field's relation. |
1043 |
my $relationName = $fieldList->{$fieldName}->{relation}; |
my $relationName = $fieldList->{$fieldName}->{relation}; |
1044 |
|
# Now we have a secondary relation. We need to insure it matches the |
1045 |
|
# mapped name of the primary relation. First we peel off the suffix |
1046 |
|
# from the mapped name. |
1047 |
|
my $mappingSuffix = substr $mappedName, length($objectName); |
1048 |
|
# Put the mapping suffix onto the relation name to get the |
1049 |
|
# mapped relation name. |
1050 |
|
my $mappedRelationName = "$relationName$mappingSuffix"; |
1051 |
# Insure the relation is in the FROM clause. |
# Insure the relation is in the FROM clause. |
1052 |
if (!exists $fromNames{$relationName}) { |
if (!exists $fromNames{$mappedRelationName}) { |
1053 |
# Add the relation to the FROM clause. |
# Add the relation to the FROM clause. |
1054 |
|
if ($mappedRelationName eq $relationName) { |
1055 |
|
# The name is un-mapped, so we add it without |
1056 |
|
# any frills. |
1057 |
$command .= ", $relationName"; |
$command .= ", $relationName"; |
|
# Create its join sub-clause. |
|
1058 |
push @joinWhere, "$objectName.id = $relationName.id"; |
push @joinWhere, "$objectName.id = $relationName.id"; |
1059 |
# Denote we have it available for future fields. |
} else { |
1060 |
$fromNames{$relationName} = 1; |
# Here we have a mapping situation. |
1061 |
|
$command .= ", $relationName $mappedRelationName"; |
1062 |
|
push @joinWhere, "$mappedRelationName.id = $mappedName.id"; |
1063 |
|
} |
1064 |
|
# Denote we have this relation available for future fields. |
1065 |
|
$fromNames{$mappedRelationName} = 1; |
1066 |
} |
} |
1067 |
# Form an SQL field reference from the relation name and the field name. |
# Form an SQL field reference from the relation name and the field name. |
1068 |
my $sqlReference = "$relationName." . _FixName($fieldName); |
my $sqlReference = "$mappedRelationName." . _FixName($fieldName); |
1069 |
# Put it into the filter string in place of the old value. |
# Put it into the filter string in place of the old value. |
1070 |
substr($filterString, $pos, $len) = $sqlReference; |
substr($filterString, $pos, $len) = $sqlReference; |
1071 |
# Reposition the search. |
# Reposition the search. |
1072 |
pos $filterString = $pos + length $sqlReference; |
pos $filterString = $pos + length $sqlReference; |
1073 |
|
Trace("New filter string is \"$filterString\".") if T(4); |
1074 |
} |
} |
1075 |
} |
} |
1076 |
} |
} |
1078 |
# is more than one object in the object list. We start with the first object and |
# is more than one object in the object list. We start with the first object and |
1079 |
# run through the objects after it. Note also that we make a safety copy of the |
# run through the objects after it. Note also that we make a safety copy of the |
1080 |
# list before running through it. |
# list before running through it. |
1081 |
my @objectList = @{$objectNames}; |
my @mappedObjectList = @mappedNameList; |
1082 |
my $lastObject = shift @objectList; |
my $lastMappedObject = shift @mappedObjectList; |
1083 |
# Get the join table. |
# Get the join table. |
1084 |
my $joinTable = $self->{_metaData}->{Joins}; |
my $joinTable = $self->{_metaData}->{Joins}; |
1085 |
# Loop through the object list. |
# Loop through the object list. |
1086 |
for my $thisObject (@objectList) { |
for my $thisMappedObject (@mappedObjectList) { |
1087 |
# Look for a join. |
# Look for a join using the real object names. |
1088 |
|
my $lastObject = $mappedNameHash{$lastMappedObject}; |
1089 |
|
my $thisObject = $mappedNameHash{$thisMappedObject}; |
1090 |
my $joinKey = "$lastObject/$thisObject"; |
my $joinKey = "$lastObject/$thisObject"; |
1091 |
if (!exists $joinTable->{$joinKey}) { |
if (!exists $joinTable->{$joinKey}) { |
1092 |
# Here there's no join, so we throw an error. |
# Here there's no join, so we throw an error. |
1093 |
Confess("No join exists to connect from $lastObject to $thisObject."); |
Confess("No join exists to connect from $lastMappedObject to $thisMappedObject."); |
1094 |
} else { |
} else { |
1095 |
# Get the join clause and add it to the WHERE list. |
# Get the join clause. |
1096 |
push @joinWhere, $joinTable->{$joinKey}; |
my $unMappedJoin = $joinTable->{$joinKey}; |
1097 |
|
# Fix the names. |
1098 |
|
$unMappedJoin =~ s/$lastObject/$lastMappedObject/; |
1099 |
|
$unMappedJoin =~ s/$thisObject/$thisMappedObject/; |
1100 |
|
push @joinWhere, $unMappedJoin; |
1101 |
# Save this object as the last object for the next iteration. |
# Save this object as the last object for the next iteration. |
1102 |
$lastObject = $thisObject; |
$lastMappedObject = $thisMappedObject; |
1103 |
} |
} |
1104 |
} |
} |
1105 |
# Now we need to handle the whole ORDER BY thing. We'll put the order by clause |
# Now we need to handle the whole ORDER BY / LIMIT thing. The important part |
1106 |
# in the following variable. |
# here is we want the filter clause to be empty if there's no WHERE filter. |
1107 |
|
# We'll put the ORDER BY / LIMIT clauses in the following variable. |
1108 |
my $orderClause = ""; |
my $orderClause = ""; |
1109 |
# Locate the ORDER BY verb (if any). |
# Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy |
1110 |
if ($filterString =~ m/^(.*)ORDER BY/g) { |
# operator so that we find the first occurrence of either verb. |
1111 |
# Here we have an ORDER BY verb. Split it off of the filter string. |
if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) { |
1112 |
|
# Here we have an ORDER BY or LIMIT verb. Split it off of the filter string. |
1113 |
my $pos = pos $filterString; |
my $pos = pos $filterString; |
1114 |
$orderClause = substr($filterString, $pos); |
$orderClause = $2 . substr($filterString, $pos); |
1115 |
$filterString = $1; |
$filterString = $1; |
1116 |
} |
} |
1117 |
# Add the filter and the join clauses (if any) to the SELECT command. |
# Add the filter and the join clauses (if any) to the SELECT command. |
1121 |
if (@joinWhere) { |
if (@joinWhere) { |
1122 |
$command .= " WHERE " . join(' AND ', @joinWhere); |
$command .= " WHERE " . join(' AND ', @joinWhere); |
1123 |
} |
} |
1124 |
# Add the sort clause (if any) to the SELECT command. |
# Add the sort or limit clause (if any) to the SELECT command. |
1125 |
if ($orderClause) { |
if ($orderClause) { |
1126 |
$command .= " ORDER BY $orderClause"; |
$command .= " $orderClause"; |
1127 |
} |
} |
1128 |
} |
} |
1129 |
Trace("SQL query: $command") if T(2); |
Trace("SQL query: $command") if T(SQL => 4); |
1130 |
Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0)); |
Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0)); |
1131 |
my $sth = $dbh->prepare_command($command); |
my $sth = $dbh->prepare_command($command); |
1132 |
# Execute it with the parameters bound in. |
# Execute it with the parameters bound in. |
1133 |
$sth->execute(@params) || Confess("SELECT error" . $sth->errstr()); |
$sth->execute(@params) || Confess("SELECT error" . $sth->errstr()); |
1134 |
|
# Now we create the relation map, which enables DBQuery to determine the order, name |
1135 |
|
# and mapped name for each object in the query. |
1136 |
|
my @relationMap = (); |
1137 |
|
for my $mappedName (@mappedNameList) { |
1138 |
|
push @relationMap, [$mappedName, $mappedNameHash{$mappedName}]; |
1139 |
|
} |
1140 |
# Return the statement object. |
# Return the statement object. |
1141 |
my $retVal = DBQuery::_new($self, $sth, @{$objectNames}); |
my $retVal = DBQuery::_new($self, $sth, \@relationMap); |
1142 |
|
return $retVal; |
1143 |
|
} |
1144 |
|
|
1145 |
|
=head3 Delete |
1146 |
|
|
1147 |
|
C<< my $stats = $erdb->Delete($entityName, $objectID); >> |
1148 |
|
|
1149 |
|
Delete an entity instance from the database. The instance is deleted along with all entity and |
1150 |
|
relationship instances dependent on it. The idea of dependence here is recursive. An object is |
1151 |
|
always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many |
1152 |
|
relationship connected to a dependent entity or the "to" entity connected to a 1-to-many |
1153 |
|
dependent relationship. |
1154 |
|
|
1155 |
|
=over 4 |
1156 |
|
|
1157 |
|
=item entityName |
1158 |
|
|
1159 |
|
Name of the entity type for the instance being deleted. |
1160 |
|
|
1161 |
|
=item objectID |
1162 |
|
|
1163 |
|
ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>), |
1164 |
|
then it is presumed to by a LIKE pattern. |
1165 |
|
|
1166 |
|
=item testFlag |
1167 |
|
|
1168 |
|
If TRUE, the delete statements will be traced without being executed. |
1169 |
|
|
1170 |
|
=item RETURN |
1171 |
|
|
1172 |
|
Returns a statistics object indicating how many records of each particular table were |
1173 |
|
deleted. |
1174 |
|
|
1175 |
|
=back |
1176 |
|
|
1177 |
|
=cut |
1178 |
|
#: Return Type $%; |
1179 |
|
sub Delete { |
1180 |
|
# Get the parameters. |
1181 |
|
my ($self, $entityName, $objectID, $testFlag) = @_; |
1182 |
|
# Declare the return variable. |
1183 |
|
my $retVal = Stats->new(); |
1184 |
|
# Get the DBKernel object. |
1185 |
|
my $db = $self->{_dbh}; |
1186 |
|
# We're going to generate all the paths branching out from the starting entity. One of |
1187 |
|
# the things we have to be careful about is preventing loops. We'll use a hash to |
1188 |
|
# determine if we've hit a loop. |
1189 |
|
my %alreadyFound = (); |
1190 |
|
# These next lists will serve as our result stack. We start by pushing object lists onto |
1191 |
|
# the stack, and then popping them off to do the deletes. This means the deletes will |
1192 |
|
# start with the longer paths before getting to the shorter ones. That, in turn, makes |
1193 |
|
# sure we don't delete records that might be needed to forge relationships back to the |
1194 |
|
# original item. We have two lists-- one for TO-relationships, and one for |
1195 |
|
# FROM-relationships and entities. |
1196 |
|
my @fromPathList = (); |
1197 |
|
my @toPathList = (); |
1198 |
|
# This final hash is used to remember what work still needs to be done. We push paths |
1199 |
|
# onto the list, then pop them off to extend the paths. We prime it with the starting |
1200 |
|
# point. Note that we will work hard to insure that the last item on a path in the |
1201 |
|
# TODO list is always an entity. |
1202 |
|
my @todoList = ([$entityName]); |
1203 |
|
while (@todoList) { |
1204 |
|
# Get the current path. |
1205 |
|
my $current = pop @todoList; |
1206 |
|
# Copy it into a list. |
1207 |
|
my @stackedPath = @{$current}; |
1208 |
|
# Pull off the last item on the path. It will always be an entity. |
1209 |
|
my $entityName = pop @stackedPath; |
1210 |
|
# Add it to the alreadyFound list. |
1211 |
|
$alreadyFound{$entityName} = 1; |
1212 |
|
# Get the entity data. |
1213 |
|
my $entityData = $self->_GetStructure($entityName); |
1214 |
|
# The first task is to loop through the entity's relation. A DELETE command will |
1215 |
|
# be needed for each of them. |
1216 |
|
my $relations = $entityData->{Relations}; |
1217 |
|
for my $relation (keys %{$relations}) { |
1218 |
|
my @augmentedList = (@stackedPath, $relation); |
1219 |
|
push @fromPathList, \@augmentedList; |
1220 |
|
} |
1221 |
|
# Now we need to look for relationships connected to this entity. |
1222 |
|
my $relationshipList = $self->{_metaData}->{Relationships}; |
1223 |
|
for my $relationshipName (keys %{$relationshipList}) { |
1224 |
|
my $relationship = $relationshipList->{$relationshipName}; |
1225 |
|
# Check the FROM field. We're only interested if it's us. |
1226 |
|
if ($relationship->{from} eq $entityName) { |
1227 |
|
# Add the path to this relationship. |
1228 |
|
my @augmentedList = (@stackedPath, $entityName, $relationshipName); |
1229 |
|
push @fromPathList, \@augmentedList; |
1230 |
|
# Check the arity. If it's MM we're done. If it's 1M |
1231 |
|
# and the target hasn't been seen yet, we want to |
1232 |
|
# stack the entity for future processing. |
1233 |
|
if ($relationship->{arity} eq '1M') { |
1234 |
|
my $toEntity = $relationship->{to}; |
1235 |
|
if (! exists $alreadyFound{$toEntity}) { |
1236 |
|
# Here we have a new entity that's dependent on |
1237 |
|
# the current entity, so we need to stack it. |
1238 |
|
my @stackList = (@augmentedList, $toEntity); |
1239 |
|
push @fromPathList, \@stackList; |
1240 |
|
} else { |
1241 |
|
Trace("$toEntity ignored because it occurred previously.") if T(4); |
1242 |
|
} |
1243 |
|
} |
1244 |
|
} |
1245 |
|
# Now check the TO field. In this case only the relationship needs |
1246 |
|
# deletion. |
1247 |
|
if ($relationship->{to} eq $entityName) { |
1248 |
|
my @augmentedList = (@stackedPath, $entityName, $relationshipName); |
1249 |
|
push @toPathList, \@augmentedList; |
1250 |
|
} |
1251 |
|
} |
1252 |
|
} |
1253 |
|
# Create the first qualifier for the WHERE clause. This selects the |
1254 |
|
# keys of the primary entity records to be deleted. When we're deleting |
1255 |
|
# from a dependent table, we construct a join page from the first qualifier |
1256 |
|
# to the table containing the dependent records to delete. |
1257 |
|
my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?"); |
1258 |
|
# We need to make two passes. The first is through the to-list, and |
1259 |
|
# the second through the from-list. The from-list is second because |
1260 |
|
# the to-list may need to pass through some of the entities the |
1261 |
|
# from-list would delete. |
1262 |
|
my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList ); |
1263 |
|
# Now it's time to do the deletes. We do it in two passes. |
1264 |
|
for my $keyName ('to_link', 'from_link') { |
1265 |
|
# Get the list for this key. |
1266 |
|
my @pathList = @{$stackList{$keyName}}; |
1267 |
|
Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3); |
1268 |
|
# Loop through this list. |
1269 |
|
while (my $path = pop @pathList) { |
1270 |
|
# Get the table whose rows are to be deleted. |
1271 |
|
my @pathTables = @{$path}; |
1272 |
|
# Start the DELETE statement. We need to call DBKernel because the |
1273 |
|
# syntax of a DELETE-USING varies among DBMSs. |
1274 |
|
my $target = $pathTables[$#pathTables]; |
1275 |
|
my $stmt = $db->SetUsing(@pathTables); |
1276 |
|
# Now start the WHERE. The first thing is the ID field from the starting table. That |
1277 |
|
# starting table will either be the entity relation or one of the entity's |
1278 |
|
# sub-relations. |
1279 |
|
$stmt .= " WHERE $pathTables[0].id $qualifier"; |
1280 |
|
# Now we run through the remaining entities in the path, connecting them up. |
1281 |
|
for (my $i = 1; $i <= $#pathTables; $i += 2) { |
1282 |
|
# Connect the current relationship to the preceding entity. |
1283 |
|
my ($entity, $rel) = @pathTables[$i-1,$i]; |
1284 |
|
# The style of connection depends on the direction of the relationship. |
1285 |
|
$stmt .= " AND $entity.id = $rel.$keyName"; |
1286 |
|
if ($i + 1 <= $#pathTables) { |
1287 |
|
# Here there's a next entity, so connect that to the relationship's |
1288 |
|
# to-link. |
1289 |
|
my $entity2 = $pathTables[$i+1]; |
1290 |
|
$stmt .= " AND $rel.to_link = $entity2.id"; |
1291 |
|
} |
1292 |
|
} |
1293 |
|
# Now we have our desired DELETE statement. |
1294 |
|
if ($testFlag) { |
1295 |
|
# Here the user wants to trace without executing. |
1296 |
|
Trace($stmt) if T(0); |
1297 |
|
} else { |
1298 |
|
# Here we can delete. Note that the SQL method dies with a confessing |
1299 |
|
# if an error occurs, so we just go ahead and do it. |
1300 |
|
Trace("Executing delete from $target using '$objectID'.") if T(3); |
1301 |
|
my $rv = $db->SQL($stmt, 0, $objectID); |
1302 |
|
# Accumulate the statistics for this delete. The only rows deleted |
1303 |
|
# are from the target table, so we use its name to record the |
1304 |
|
# statistic. |
1305 |
|
$retVal->Add($target, $rv); |
1306 |
|
} |
1307 |
|
} |
1308 |
|
} |
1309 |
|
# Return the result. |
1310 |
return $retVal; |
return $retVal; |
1311 |
} |
} |
1312 |
|
|
1313 |
=head3 GetList |
=head3 GetList |
1314 |
|
|
1315 |
C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
1316 |
|
|
1317 |
Return a list of object descriptors for the specified objects as determined by the |
Return a list of object descriptors for the specified objects as determined by the |
1318 |
specified filter clause. |
specified filter clause. |
1319 |
|
|
1320 |
This method is essentially the same as L</Get> except it returns a list of objects rather |
This method is essentially the same as L</Get> except it returns a list of objects rather |
1321 |
that a query object that can be used to get the results one record at a time. |
than a query object that can be used to get the results one record at a time. |
|
|
|
|
=over 4 |
|
1322 |
|
|
1323 |
=over 4 |
=over 4 |
1324 |
|
|
1375 |
|
|
1376 |
=head3 ComputeObjectSentence |
=head3 ComputeObjectSentence |
1377 |
|
|
1378 |
C<< my $sentence = $database->ComputeObjectSentence($objectName); >> |
C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >> |
1379 |
|
|
1380 |
Check an object name, and if it is a relationship convert it to a relationship sentence. |
Check an object name, and if it is a relationship convert it to a relationship sentence. |
1381 |
|
|
1410 |
|
|
1411 |
=head3 DumpRelations |
=head3 DumpRelations |
1412 |
|
|
1413 |
C<< $database->DumpRelations($outputDirectory); >> |
C<< $erdb->DumpRelations($outputDirectory); >> |
1414 |
|
|
1415 |
Write the contents of all the relations to tab-delimited files in the specified directory. |
Write the contents of all the relations to tab-delimited files in the specified directory. |
1416 |
Each file will have the same name as the relation dumped, with an extension of DTX. |
Each file will have the same name as the relation dumped, with an extension of DTX. |
1452 |
|
|
1453 |
=head3 InsertObject |
=head3 InsertObject |
1454 |
|
|
1455 |
C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >> |
C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >> |
1456 |
|
|
1457 |
Insert an object into the database. The object is defined by a type name and then a hash |
Insert an object into the database. The object is defined by a type name and then a hash |
1458 |
of field names to values. Field values in the primary relation are represented by scalars. |
of field names to values. Field values in the primary relation are represented by scalars. |
1461 |
example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases |
example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases |
1462 |
C<ZP_00210270.1> and C<gi|46206278>. |
C<ZP_00210270.1> and C<gi|46206278>. |
1463 |
|
|
1464 |
C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >> |
C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >> |
1465 |
|
|
1466 |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
1467 |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
1468 |
|
|
1469 |
C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
1470 |
|
|
1471 |
=over 4 |
=over 4 |
1472 |
|
|
1591 |
|
|
1592 |
=head3 LoadTable |
=head3 LoadTable |
1593 |
|
|
1594 |
C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >> |
C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >> |
1595 |
|
|
1596 |
Load data from a tab-delimited file into a specified table, optionally re-creating the table first. |
Load data from a tab-delimited file into a specified table, optionally re-creating the table |
1597 |
|
first. |
1598 |
|
|
1599 |
=over 4 |
=over 4 |
1600 |
|
|
1612 |
|
|
1613 |
=item RETURN |
=item RETURN |
1614 |
|
|
1615 |
Returns a statistical object containing the number of records read and a list of the error messages. |
Returns a statistical object containing a list of the error messages. |
1616 |
|
|
1617 |
=back |
=back |
1618 |
|
|
1623 |
# Create the statistical return object. |
# Create the statistical return object. |
1624 |
my $retVal = _GetLoadStats(); |
my $retVal = _GetLoadStats(); |
1625 |
# Trace the fact of the load. |
# Trace the fact of the load. |
1626 |
Trace("Loading table $relationName from $fileName") if T(1); |
Trace("Loading table $relationName from $fileName") if T(2); |
1627 |
# Get the database handle. |
# Get the database handle. |
1628 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
1629 |
|
# Get the input file size. |
1630 |
|
my $fileSize = -s $fileName; |
1631 |
# Get the relation data. |
# Get the relation data. |
1632 |
my $relation = $self->_FindRelation($relationName); |
my $relation = $self->_FindRelation($relationName); |
1633 |
# Check the truncation flag. |
# Check the truncation flag. |
1634 |
if ($truncateFlag) { |
if ($truncateFlag) { |
1635 |
Trace("Creating table $relationName") if T(1); |
Trace("Creating table $relationName") if T(2); |
1636 |
|
# Compute the row count estimate. We take the size of the load file, |
1637 |
|
# divide it by the estimated row size, and then multiply by 1.5 to |
1638 |
|
# leave extra room. We postulate a minimum row count of 1000 to |
1639 |
|
# prevent problems with incoming empty load files. |
1640 |
|
my $rowSize = $self->EstimateRowSize($relationName); |
1641 |
|
my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000); |
1642 |
# Re-create the table without its index. |
# Re-create the table without its index. |
1643 |
$self->CreateTable($relationName, 0); |
$self->CreateTable($relationName, 0, $estimate); |
1644 |
|
# If this is a pre-index DBMS, create the index here. |
1645 |
|
if ($dbh->{_preIndex}) { |
1646 |
|
eval { |
1647 |
|
$self->CreateIndex($relationName); |
1648 |
|
}; |
1649 |
|
if ($@) { |
1650 |
|
$retVal->AddMessage($@); |
1651 |
|
} |
1652 |
|
} |
1653 |
} |
} |
|
# Determine whether or not this is a primary relation. Primary relations have an extra |
|
|
# field indicating whether or not a given object is new or was loaded from the flat files. |
|
|
my $primary = $self->_IsPrimary($relationName); |
|
|
# Get the number of fields in this relation. |
|
|
my @fieldList = @{$relation->{Fields}}; |
|
|
my $fieldCount = @fieldList; |
|
|
# Record the number of expected fields. |
|
|
my $expectedFields = $fieldCount + ($primary ? 1 : 0); |
|
|
# Start a database transaction. |
|
|
$dbh->begin_tran; |
|
|
# Open the relation file. We need to create a cleaned-up copy before loading. |
|
|
open TABLEIN, '<', $fileName; |
|
|
my $tempName = "$fileName.tbl"; |
|
|
open TABLEOUT, '>', $tempName; |
|
|
# Loop through the file. |
|
|
while (<TABLEIN>) { |
|
|
# Chop off the new-line character. |
|
|
my $record = $_; |
|
|
chomp $record; |
|
|
# Only proceed if the record is non-blank. |
|
|
if ($record) { |
|
|
# Escape all the backslashes found in the line. |
|
|
$record =~ s/\\/\\\\/g; |
|
|
# Eliminate any trailing tabs. |
|
|
chop $record while substr($record, -1) eq "\t"; |
|
|
# If this is a primary relation, add a 0 for the new-record flag (indicating that |
|
|
# this record is not new, but part of the original load). |
|
|
if ($primary) { |
|
|
$record .= "\t0"; |
|
|
} |
|
|
# Write the record. |
|
|
print TABLEOUT "$record\n"; |
|
|
# Count the record read. |
|
|
my $count = $retVal->Add('records'); |
|
|
my $len = length $record; |
|
|
Trace("Record $count written with $len characters.") if T(4); |
|
|
} |
|
|
} |
|
|
# Close the files. |
|
|
close TABLEIN; |
|
|
close TABLEOUT; |
|
|
Trace("Temporary file $tempName created.") if T(4); |
|
1654 |
# Load the table. |
# Load the table. |
1655 |
my $rv; |
my $rv; |
1656 |
eval { |
eval { |
1657 |
$rv = $dbh->load_table(file => $tempName, tbl => $relationName); |
$rv = $dbh->load_table(file => $fileName, tbl => $relationName); |
1658 |
}; |
}; |
1659 |
if (!defined $rv) { |
if (!defined $rv) { |
1660 |
$retVal->AddMessage($@) if ($@); |
$retVal->AddMessage($@) if ($@); |
1661 |
$retVal->AddMessage("Table load failed for $relationName using $tempName."); |
$retVal->AddMessage("Table load failed for $relationName using $fileName."); |
1662 |
Trace("Table load failed for $relationName.") if T(1); |
Trace("Table load failed for $relationName.") if T(1); |
1663 |
} else { |
} else { |
1664 |
# Here we successfully loaded the table. Trace the number of records loaded. |
# Here we successfully loaded the table. |
1665 |
Trace("$retVal->{records} records read for $relationName.") if T(1); |
$retVal->Add("tables"); |
1666 |
|
my $size = -s $fileName; |
1667 |
|
Trace("$size bytes loaded into $relationName.") if T(2); |
1668 |
# If we're rebuilding, we need to create the table indexes. |
# If we're rebuilding, we need to create the table indexes. |
1669 |
if ($truncateFlag) { |
if ($truncateFlag && ! $dbh->{_preIndex}) { |
1670 |
eval { |
eval { |
1671 |
$self->CreateIndex($relationName); |
$self->CreateIndex($relationName); |
1672 |
}; |
}; |
1675 |
} |
} |
1676 |
} |
} |
1677 |
} |
} |
1678 |
# Commit the database changes. |
# Analyze the table to improve performance. |
1679 |
$dbh->commit_tran; |
$dbh->vacuum_it($relationName); |
|
# Delete the temporary file. |
|
|
unlink $tempName; |
|
1680 |
# Return the statistics. |
# Return the statistics. |
1681 |
return $retVal; |
return $retVal; |
1682 |
} |
} |
1683 |
|
|
1684 |
=head3 GenerateEntity |
=head3 GenerateEntity |
1685 |
|
|
1686 |
C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >> |
C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >> |
1687 |
|
|
1688 |
Generate the data for a new entity instance. This method creates a field hash suitable for |
Generate the data for a new entity instance. This method creates a field hash suitable for |
1689 |
passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest |
passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest |
1741 |
|
|
1742 |
=head3 GetEntity |
=head3 GetEntity |
1743 |
|
|
1744 |
C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >> |
C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >> |
1745 |
|
|
1746 |
Return an object describing the entity instance with a specified ID. |
Return an object describing the entity instance with a specified ID. |
1747 |
|
|
1777 |
|
|
1778 |
=head3 GetEntityValues |
=head3 GetEntityValues |
1779 |
|
|
1780 |
C<< my @values = GetEntityValues($entityType, $ID, \@fields); >> |
C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >> |
1781 |
|
|
1782 |
Return a list of values from a specified entity instance. |
Return a list of values from a specified entity instance. |
1783 |
|
|
1818 |
return @retVal; |
return @retVal; |
1819 |
} |
} |
1820 |
|
|
1821 |
|
=head3 GetAll |
1822 |
|
|
1823 |
|
C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >> |
1824 |
|
|
1825 |
|
Return a list of values taken from the objects returned by a query. The first three |
1826 |
|
parameters correspond to the parameters of the L</Get> method. The final parameter is |
1827 |
|
a list of the fields desired from each record found by the query. The field name |
1828 |
|
syntax is the standard syntax used for fields in the B<ERDB> system-- |
1829 |
|
B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity |
1830 |
|
or relationship and I<fieldName> is the name of the field. |
1831 |
|
|
1832 |
|
The list returned will be a list of lists. Each element of the list will contain |
1833 |
|
the values returned for the fields specified in the fourth parameter. If one of the |
1834 |
|
fields specified returns multiple values, they are flattened in with the rest. For |
1835 |
|
example, the following call will return a list of the features in a particular |
1836 |
|
spreadsheet cell, and each feature will be represented by a list containing the |
1837 |
|
feature ID followed by all of its aliases. |
1838 |
|
|
1839 |
|
C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >> |
1840 |
|
|
1841 |
|
=over 4 |
1842 |
|
|
1843 |
|
=item objectNames |
1844 |
|
|
1845 |
|
List containing the names of the entity and relationship objects to be retrieved. |
1846 |
|
|
1847 |
|
=item filterClause |
1848 |
|
|
1849 |
|
WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can |
1850 |
|
be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form |
1851 |
|
B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the |
1852 |
|
parameter list as additional parameters. The fields in a filter clause can come from primary |
1853 |
|
entity relations, relationship relations, or secondary entity relations; however, all of the |
1854 |
|
entities and relationships involved must be included in the list of object names. |
1855 |
|
|
1856 |
|
=item parameterList |
1857 |
|
|
1858 |
|
List of the parameters to be substituted in for the parameters marks in the filter clause. |
1859 |
|
|
1860 |
|
=item fields |
1861 |
|
|
1862 |
|
List of the fields to be returned in each element of the list returned. |
1863 |
|
|
1864 |
|
=item count |
1865 |
|
|
1866 |
|
Maximum number of records to return. If omitted or 0, all available records will be returned. |
1867 |
|
|
1868 |
|
=item RETURN |
1869 |
|
|
1870 |
|
Returns a list of list references. Each element of the return list contains the values for the |
1871 |
|
fields specified in the B<fields> parameter. |
1872 |
|
|
1873 |
|
=back |
1874 |
|
|
1875 |
|
=cut |
1876 |
|
#: Return Type @@; |
1877 |
|
sub GetAll { |
1878 |
|
# Get the parameters. |
1879 |
|
my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_; |
1880 |
|
# Translate the parameters from a list reference to a list. If the parameter |
1881 |
|
# list is a scalar we convert it into a singleton list. |
1882 |
|
my @parmList = (); |
1883 |
|
if (ref $parameterList eq "ARRAY") { |
1884 |
|
@parmList = @{$parameterList}; |
1885 |
|
} else { |
1886 |
|
push @parmList, $parameterList; |
1887 |
|
} |
1888 |
|
# Insure the counter has a value. |
1889 |
|
if (!defined $count) { |
1890 |
|
$count = 0; |
1891 |
|
} |
1892 |
|
# Add the row limit to the filter clause. |
1893 |
|
if ($count > 0) { |
1894 |
|
$filterClause .= " LIMIT $count"; |
1895 |
|
} |
1896 |
|
# Create the query. |
1897 |
|
my $query = $self->Get($objectNames, $filterClause, @parmList); |
1898 |
|
# Set up a counter of the number of records read. |
1899 |
|
my $fetched = 0; |
1900 |
|
# Loop through the records returned, extracting the fields. Note that if the |
1901 |
|
# counter is non-zero, we stop when the number of records read hits the count. |
1902 |
|
my @retVal = (); |
1903 |
|
while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) { |
1904 |
|
my @rowData = $row->Values($fields); |
1905 |
|
push @retVal, \@rowData; |
1906 |
|
$fetched++; |
1907 |
|
} |
1908 |
|
# Return the resulting list. |
1909 |
|
return @retVal; |
1910 |
|
} |
1911 |
|
|
1912 |
|
=head3 EstimateRowSize |
1913 |
|
|
1914 |
|
C<< my $rowSize = $erdb->EstimateRowSize($relName); >> |
1915 |
|
|
1916 |
|
Estimate the row size of the specified relation. The estimated row size is computed by adding |
1917 |
|
up the average length for each data type. |
1918 |
|
|
1919 |
|
=over 4 |
1920 |
|
|
1921 |
|
=item relName |
1922 |
|
|
1923 |
|
Name of the relation whose estimated row size is desired. |
1924 |
|
|
1925 |
|
=item RETURN |
1926 |
|
|
1927 |
|
Returns an estimate of the row size for the specified relation. |
1928 |
|
|
1929 |
|
=back |
1930 |
|
|
1931 |
|
=cut |
1932 |
|
#: Return Type $; |
1933 |
|
sub EstimateRowSize { |
1934 |
|
# Get the parameters. |
1935 |
|
my ($self, $relName) = @_; |
1936 |
|
# Declare the return variable. |
1937 |
|
my $retVal = 0; |
1938 |
|
# Find the relation descriptor. |
1939 |
|
my $relation = $self->_FindRelation($relName); |
1940 |
|
# Get the list of fields. |
1941 |
|
for my $fieldData (@{$relation->{Fields}}) { |
1942 |
|
# Get the field type and add its length. |
1943 |
|
my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen}; |
1944 |
|
$retVal += $fieldLen; |
1945 |
|
} |
1946 |
|
# Return the result. |
1947 |
|
return $retVal; |
1948 |
|
} |
1949 |
|
|
1950 |
|
=head3 GetFieldTable |
1951 |
|
|
1952 |
|
C<< my $fieldHash = $self->GetFieldTable($objectnName); >> |
1953 |
|
|
1954 |
|
Get the field structure for a specified entity or relationship. |
1955 |
|
|
1956 |
|
=over 4 |
1957 |
|
|
1958 |
|
=item objectName |
1959 |
|
|
1960 |
|
Name of the desired entity or relationship. |
1961 |
|
|
1962 |
|
=item RETURN |
1963 |
|
|
1964 |
|
The table containing the field descriptors for the specified object. |
1965 |
|
|
1966 |
|
=back |
1967 |
|
|
1968 |
|
=cut |
1969 |
|
|
1970 |
|
sub GetFieldTable { |
1971 |
|
# Get the parameters. |
1972 |
|
my ($self, $objectName) = @_; |
1973 |
|
# Get the descriptor from the metadata. |
1974 |
|
my $objectData = $self->_GetStructure($objectName); |
1975 |
|
# Return the object's field table. |
1976 |
|
return $objectData->{Fields}; |
1977 |
|
} |
1978 |
|
|
1979 |
|
=head3 GetUsefulCrossValues |
1980 |
|
|
1981 |
|
C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >> |
1982 |
|
|
1983 |
|
Return a list of the useful attributes that would be returned by a B<Cross> call |
1984 |
|
from an entity of the source entity type through the specified relationship. This |
1985 |
|
means it will return the fields of the target entity type and the intersection data |
1986 |
|
fields in the relationship. Only primary table fields are returned. In other words, |
1987 |
|
the field names returned will be for fields where there is always one and only one |
1988 |
|
value. |
1989 |
|
|
1990 |
|
=over 4 |
1991 |
|
|
1992 |
|
=item sourceEntity |
1993 |
|
|
1994 |
|
Name of the entity from which the relationship crossing will start. |
1995 |
|
|
1996 |
|
=item relationship |
1997 |
|
|
1998 |
|
Name of the relationship being crossed. |
1999 |
|
|
2000 |
|
=item RETURN |
2001 |
|
|
2002 |
|
Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>. |
2003 |
|
|
2004 |
|
=back |
2005 |
|
|
2006 |
|
=cut |
2007 |
|
#: Return Type @; |
2008 |
|
sub GetUsefulCrossValues { |
2009 |
|
# Get the parameters. |
2010 |
|
my ($self, $sourceEntity, $relationship) = @_; |
2011 |
|
# Declare the return variable. |
2012 |
|
my @retVal = (); |
2013 |
|
# Determine the target entity for the relationship. This is whichever entity is not |
2014 |
|
# the source entity. So, if the source entity is the FROM, we'll get the name of |
2015 |
|
# the TO, and vice versa. |
2016 |
|
my $relStructure = $self->_GetStructure($relationship); |
2017 |
|
my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from"); |
2018 |
|
my $targetEntity = $relStructure->{$targetEntityType}; |
2019 |
|
# Get the field table for the entity. |
2020 |
|
my $entityFields = $self->GetFieldTable($targetEntity); |
2021 |
|
# The field table is a hash. The hash key is the field name. The hash value is a structure. |
2022 |
|
# For the entity fields, the key aspect of the target structure is that the {relation} value |
2023 |
|
# must match the entity name. |
2024 |
|
my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity } |
2025 |
|
keys %{$entityFields}; |
2026 |
|
# Push the fields found onto the return variable. |
2027 |
|
push @retVal, sort @fieldList; |
2028 |
|
# Get the field table for the relationship. |
2029 |
|
my $relationshipFields = $self->GetFieldTable($relationship); |
2030 |
|
# Here we have a different rule. We want all the fields other than "from-link" and "to-link". |
2031 |
|
# This may end up being an empty set. |
2032 |
|
my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" } |
2033 |
|
keys %{$relationshipFields}; |
2034 |
|
# Push these onto the return list. |
2035 |
|
push @retVal, sort @fieldList2; |
2036 |
|
# Return the result. |
2037 |
|
return @retVal; |
2038 |
|
} |
2039 |
|
|
2040 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
2041 |
|
|
2042 |
=head3 GetLoadStats |
=head3 GetLoadStats |
2048 |
=cut |
=cut |
2049 |
|
|
2050 |
sub _GetLoadStats { |
sub _GetLoadStats { |
2051 |
return Stats->new('records'); |
return Stats->new(); |
2052 |
} |
} |
2053 |
|
|
2054 |
=head3 GenerateFields |
=head3 GenerateFields |
2243 |
return $objectData->{Relations}; |
return $objectData->{Relations}; |
2244 |
} |
} |
2245 |
|
|
|
=head3 GetFieldTable |
|
|
|
|
|
Get the field structure for a specified entity or relationship. |
|
|
|
|
|
This is an instance method. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item objectName |
|
|
|
|
|
Name of the desired entity or relationship. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
The table containing the field descriptors for the specified object. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub _GetFieldTable { |
|
|
# Get the parameters. |
|
|
my ($self, $objectName) = @_; |
|
|
# Get the descriptor from the metadata. |
|
|
my $objectData = $self->_GetStructure($objectName); |
|
|
# Return the object's field table. |
|
|
return $objectData->{Fields}; |
|
|
} |
|
|
|
|
2246 |
=head3 ValidateFieldNames |
=head3 ValidateFieldNames |
2247 |
|
|
2248 |
Determine whether or not the field names are valid. A description of the problems with the names |
Determine whether or not the field names are valid. A description of the problems with the names |
2383 |
sub _LoadMetaData { |
sub _LoadMetaData { |
2384 |
# Get the parameters. |
# Get the parameters. |
2385 |
my ($filename) = @_; |
my ($filename) = @_; |
2386 |
|
Trace("Reading Sprout DBD from $filename.") if T(2); |
2387 |
# Slurp the XML file into a variable. Extensive use of options is used to insure we |
# Slurp the XML file into a variable. Extensive use of options is used to insure we |
2388 |
# get the exact structure we want. |
# get the exact structure we want. |
2389 |
my $metadata = XML::Simple::XMLin($filename, |
my $metadata = XML::Simple::XMLin($filename, |
2411 |
for my $entityName (keys %{$entityList}) { |
for my $entityName (keys %{$entityList}) { |
2412 |
my $entityStructure = $entityList->{$entityName}; |
my $entityStructure = $entityList->{$entityName}; |
2413 |
# |
# |
2414 |
# The first step is to run creating all the entity's default values. For C<Field> elements, |
# The first step is to create all the entity's default values. For C<Field> elements, |
2415 |
# the relation name must be added where it is not specified. For relationships, |
# the relation name must be added where it is not specified. For relationships, |
2416 |
# the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id> |
# the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id> |
2417 |
# field must be added to each relation. Finally, each field will have a C<PrettySort> attribute |
# field must be added to each relation. Finally, each field will have a C<PrettySort> attribute |
2590 |
my @fromList = (); |
my @fromList = (); |
2591 |
my @toList = (); |
my @toList = (); |
2592 |
my @bothList = (); |
my @bothList = (); |
2593 |
Trace("Join table build for $entityName.") if T(3); |
Trace("Join table build for $entityName.") if T(metadata => 4); |
2594 |
for my $relationshipName (keys %{$relationshipList}) { |
for my $relationshipName (keys %{$relationshipList}) { |
2595 |
my $relationship = $relationshipList->{$relationshipName}; |
my $relationship = $relationshipList->{$relationshipName}; |
2596 |
# Determine if this relationship has our entity in one of its link fields. |
# Determine if this relationship has our entity in one of its link fields. |
2597 |
my $fromEntity = $relationship->{from}; |
my $fromEntity = $relationship->{from}; |
2598 |
my $toEntity = $relationship->{to}; |
my $toEntity = $relationship->{to}; |
2599 |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3); |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4); |
2600 |
if ($fromEntity eq $entityName) { |
if ($fromEntity eq $entityName) { |
2601 |
if ($toEntity eq $entityName) { |
if ($toEntity eq $entityName) { |
2602 |
# Here the relationship is recursive. |
# Here the relationship is recursive. |
2603 |
push @bothList, $relationshipName; |
push @bothList, $relationshipName; |
2604 |
Trace("Relationship $relationshipName put in both-list.") if T(3); |
Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4); |
2605 |
} else { |
} else { |
2606 |
# Here the relationship comes from the entity. |
# Here the relationship comes from the entity. |
2607 |
push @fromList, $relationshipName; |
push @fromList, $relationshipName; |
2608 |
Trace("Relationship $relationshipName put in from-list.") if T(3); |
Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4); |
2609 |
} |
} |
2610 |
} elsif ($toEntity eq $entityName) { |
} elsif ($toEntity eq $entityName) { |
2611 |
# Here the relationship goes to the entity. |
# Here the relationship goes to the entity. |
2612 |
push @toList, $relationshipName; |
push @toList, $relationshipName; |
2613 |
Trace("Relationship $relationshipName put in to-list.") if T(3); |
Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4); |
2614 |
} |
} |
2615 |
} |
} |
2616 |
# Create the nonrecursive joins. Note that we build two hashes for running |
# Create the nonrecursive joins. Note that we build two hashes for running |
2626 |
# Create joins between the entity and this relationship. |
# Create joins between the entity and this relationship. |
2627 |
my $linkField = "$relationshipName.${linkType}_link"; |
my $linkField = "$relationshipName.${linkType}_link"; |
2628 |
my $joinClause = "$entityName.id = $linkField"; |
my $joinClause = "$entityName.id = $linkField"; |
2629 |
Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4); |
Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4); |
2630 |
$joinTable{"$entityName/$relationshipName"} = $joinClause; |
$joinTable{"$entityName/$relationshipName"} = $joinClause; |
2631 |
$joinTable{"$relationshipName/$entityName"} = $joinClause; |
$joinTable{"$relationshipName/$entityName"} = $joinClause; |
2632 |
# Create joins between this relationship and the other relationships. |
# Create joins between this relationship and the other relationships. |
2647 |
# relationship and itself are prohibited. |
# relationship and itself are prohibited. |
2648 |
my $relJoinClause = "$otherName.${otherType}_link = $linkField"; |
my $relJoinClause = "$otherName.${otherType}_link = $linkField"; |
2649 |
$joinTable{$joinKey} = $relJoinClause; |
$joinTable{$joinKey} = $relJoinClause; |
2650 |
Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4); |
Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4); |
2651 |
} |
} |
2652 |
} |
} |
2653 |
} |
} |
2656 |
# relationship can only be ambiguous with another recursive relationship, |
# relationship can only be ambiguous with another recursive relationship, |
2657 |
# and the incoming relationship from the outer loop is never recursive. |
# and the incoming relationship from the outer loop is never recursive. |
2658 |
for my $otherName (@bothList) { |
for my $otherName (@bothList) { |
2659 |
Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3); |
Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4); |
2660 |
# Join from the left. |
# Join from the left. |
2661 |
$joinTable{"$relationshipName/$otherName"} = |
$joinTable{"$relationshipName/$otherName"} = |
2662 |
"$linkField = $otherName.from_link"; |
"$linkField = $otherName.from_link"; |
2671 |
# rise to situations where we can't create the path we want; however, it is always |
# rise to situations where we can't create the path we want; however, it is always |
2672 |
# possible to get the same effect using multiple queries. |
# possible to get the same effect using multiple queries. |
2673 |
for my $relationshipName (@bothList) { |
for my $relationshipName (@bothList) { |
2674 |
Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3); |
Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4); |
2675 |
# Join to the entity from each direction. |
# Join to the entity from each direction. |
2676 |
$joinTable{"$entityName/$relationshipName"} = |
$joinTable{"$entityName/$relationshipName"} = |
2677 |
"$entityName.id = $relationshipName.from_link"; |
"$entityName.id = $relationshipName.from_link"; |
2722 |
# index descriptor does not exist, it will be created automatically so we can add |
# index descriptor does not exist, it will be created automatically so we can add |
2723 |
# the field to it. |
# the field to it. |
2724 |
unshift @{$newIndex->{IndexFields}}, $firstField; |
unshift @{$newIndex->{IndexFields}}, $firstField; |
2725 |
|
# If this is a one-to-many relationship, the "To" index is unique. |
2726 |
|
if ($relationshipStructure->{arity} eq "1M" && $indexKey eq "To") { |
2727 |
|
$newIndex->{Unique} = 'true'; |
2728 |
|
} |
2729 |
# Add the index to the relation. |
# Add the index to the relation. |
2730 |
_AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex); |
_AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex); |
2731 |
} |
} |
2817 |
# Here we have a field list. Loop through its fields. |
# Here we have a field list. Loop through its fields. |
2818 |
my $fieldStructures = $structure->{Fields}; |
my $fieldStructures = $structure->{Fields}; |
2819 |
for my $fieldName (keys %{$fieldStructures}) { |
for my $fieldName (keys %{$fieldStructures}) { |
2820 |
|
Trace("Processing field $fieldName of $defaultRelationName.") if T(4); |
2821 |
my $fieldData = $fieldStructures->{$fieldName}; |
my $fieldData = $fieldStructures->{$fieldName}; |
2822 |
# Get the field type. |
# Get the field type. |
2823 |
my $type = $fieldData->{type}; |
my $type = $fieldData->{type}; |