9 |
use DBObject; |
use DBObject; |
10 |
use Stats; |
use Stats; |
11 |
use Time::HiRes qw(gettimeofday); |
use Time::HiRes qw(gettimeofday); |
12 |
|
use Digest::MD5 qw(md5_base64); |
13 |
use FIG; |
use FIG; |
14 |
|
|
15 |
=head1 Entity-Relationship Database Package |
=head1 Entity-Relationship Database Package |
110 |
compatability with certain database packages), but the only values supported are |
compatability with certain database packages), but the only values supported are |
111 |
0 and 1. |
0 and 1. |
112 |
|
|
113 |
|
=item id-string |
114 |
|
|
115 |
|
variable-length string, maximum 25 characters |
116 |
|
|
117 |
=item key-string |
=item key-string |
118 |
|
|
119 |
variable-length string, maximum 40 characters |
variable-length string, maximum 40 characters |
130 |
|
|
131 |
variable-length string, maximum 255 characters |
variable-length string, maximum 255 characters |
132 |
|
|
133 |
|
=item hash-string |
134 |
|
|
135 |
|
variable-length string, maximum 22 characters |
136 |
|
|
137 |
=back |
=back |
138 |
|
|
139 |
|
The hash-string data type has a special meaning. The actual key passed into the loader will |
140 |
|
be a string, but it will be digested into a 22-character MD5 code to save space. Although the |
141 |
|
MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same |
142 |
|
digest. Therefore, it is presumed the keys will be unique. When the database is actually |
143 |
|
in use, the hashed keys will be presented rather than the original values. For this reason, |
144 |
|
they should not be used for entities where the key is meaningful. |
145 |
|
|
146 |
=head3 Global Tags |
=head3 Global Tags |
147 |
|
|
148 |
The entire database definition must be inside a B<Database> tag. The display name of |
The entire database definition must be inside a B<Database> tag. The display name of |
326 |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
327 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
328 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, dataGen => "IntGen(0, 1)" }, |
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, dataGen => "IntGen(0, 1)" }, |
329 |
|
'hash-string' => |
330 |
|
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, dataGen => "SringGen(22)" }, |
331 |
|
'id-string' => |
332 |
|
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, dataGen => "SringGen(22)" }, |
333 |
'key-string' => |
'key-string' => |
334 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
335 |
'name-string' => |
'name-string' => |
422 |
# Write the HTML heading stuff. |
# Write the HTML heading stuff. |
423 |
print HTMLOUT "<html>\n<head>\n<title>$title</title>\n"; |
print HTMLOUT "<html>\n<head>\n<title>$title</title>\n"; |
424 |
print HTMLOUT "</head>\n<body>\n"; |
print HTMLOUT "</head>\n<body>\n"; |
425 |
|
# Write the documentation. |
426 |
|
print HTMLOUT $self->DisplayMetaData(); |
427 |
|
# Close the document. |
428 |
|
print HTMLOUT "</body>\n</html>\n"; |
429 |
|
# Close the file. |
430 |
|
close HTMLOUT; |
431 |
|
} |
432 |
|
|
433 |
|
=head3 DisplayMetaData |
434 |
|
|
435 |
|
C<< my $html = $erdb->DisplayMetaData(); >> |
436 |
|
|
437 |
|
Return an HTML description of the database. This description can be used to help users create |
438 |
|
the data to be loaded into the relations and form queries. The output is raw includable HTML |
439 |
|
without any HEAD or BODY tags. |
440 |
|
|
441 |
|
=over 4 |
442 |
|
|
443 |
|
=item filename |
444 |
|
|
445 |
|
The name of the output file. |
446 |
|
|
447 |
|
=back |
448 |
|
|
449 |
|
=cut |
450 |
|
|
451 |
|
sub DisplayMetaData { |
452 |
|
# Get the parameters. |
453 |
|
my ($self) = @_; |
454 |
|
# Get the metadata and the title string. |
455 |
|
my $metadata = $self->{_metaData}; |
456 |
|
# Get the title string. |
457 |
|
my $title = $metadata->{Title}; |
458 |
|
# Get the entity and relationship lists. |
459 |
|
my $entityList = $metadata->{Entities}; |
460 |
|
my $relationshipList = $metadata->{Relationships}; |
461 |
|
# Declare the return variable. |
462 |
|
my $retVal = ""; |
463 |
|
# Open the output file. |
464 |
|
Trace("Building MetaData table of contents.") if T(4); |
465 |
# Here we do the table of contents. It starts as an unordered list of section names. Each |
# Here we do the table of contents. It starts as an unordered list of section names. Each |
466 |
# section contains an ordered list of entity or relationship subsections. |
# section contains an ordered list of entity or relationship subsections. |
467 |
print HTMLOUT "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n"; |
$retVal .= "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n"; |
468 |
# Loop through the Entities, displaying a list item for each. |
# Loop through the Entities, displaying a list item for each. |
469 |
foreach my $key (sort keys %{$entityList}) { |
foreach my $key (sort keys %{$entityList}) { |
470 |
# Display this item. |
# Display this item. |
471 |
print HTMLOUT "<li><a href=\"#$key\">$key</a></li>\n"; |
$retVal .= "<li><a href=\"#$key\">$key</a></li>\n"; |
472 |
} |
} |
473 |
# Close off the entity section and start the relationship section. |
# Close off the entity section and start the relationship section. |
474 |
print HTMLOUT "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n"; |
$retVal .= "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n"; |
475 |
# Loop through the Relationships. |
# Loop through the Relationships. |
476 |
foreach my $key (sort keys %{$relationshipList}) { |
foreach my $key (sort keys %{$relationshipList}) { |
477 |
# Display this item. |
# Display this item. |
478 |
my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key}); |
my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key}); |
479 |
print HTMLOUT "<li><a href=\"#$key\">$relationshipTitle</a></li>\n"; |
$retVal .= "<li><a href=\"#$key\">$relationshipTitle</a></li>\n"; |
480 |
} |
} |
481 |
# Close off the relationship section and list the join table section. |
# Close off the relationship section and list the join table section. |
482 |
print HTMLOUT "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n"; |
$retVal .= "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n"; |
483 |
# Close off the table of contents itself. |
# Close off the table of contents itself. |
484 |
print HTMLOUT "</ul>\n"; |
$retVal .= "</ul>\n"; |
485 |
# Now we start with the actual data. Denote we're starting the entity section. |
# Now we start with the actual data. Denote we're starting the entity section. |
486 |
print HTMLOUT "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n"; |
$retVal .= "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n"; |
487 |
# Loop through the entities. |
# Loop through the entities. |
488 |
for my $key (sort keys %{$entityList}) { |
for my $key (sort keys %{$entityList}) { |
489 |
Trace("Building MetaData entry for $key entity.") if T(4); |
Trace("Building MetaData entry for $key entity.") if T(4); |
490 |
# Create the entity header. It contains a bookmark and the entity name. |
# Create the entity header. It contains a bookmark and the entity name. |
491 |
print HTMLOUT "<a name=\"$key\"></a><h3>$key</h3>\n"; |
$retVal .= "<a name=\"$key\"></a><h3>$key</h3>\n"; |
492 |
# Get the entity data. |
# Get the entity data. |
493 |
my $entityData = $entityList->{$key}; |
my $entityData = $entityList->{$key}; |
494 |
# If there's descriptive text, display it. |
# If there's descriptive text, display it. |
495 |
if (my $notes = $entityData->{Notes}) { |
if (my $notes = $entityData->{Notes}) { |
496 |
print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
$retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
497 |
} |
} |
498 |
# Now we want a list of the entity's relationships. First, we set up the relationship subsection. |
# Now we want a list of the entity's relationships. First, we set up the relationship subsection. |
499 |
print HTMLOUT "<h4>Relationships for <b>$key</b></h4>\n<ul>\n"; |
$retVal .= "<h4>Relationships for <b>$key</b></h4>\n<ul>\n"; |
500 |
# Loop through the relationships. |
# Loop through the relationships. |
501 |
for my $relationship (sort keys %{$relationshipList}) { |
for my $relationship (sort keys %{$relationshipList}) { |
502 |
# Get the relationship data. |
# Get the relationship data. |
506 |
# Get the relationship sentence and append the arity. |
# Get the relationship sentence and append the arity. |
507 |
my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure); |
my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure); |
508 |
# Display the relationship data. |
# Display the relationship data. |
509 |
print HTMLOUT "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n"; |
$retVal .= "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n"; |
510 |
} |
} |
511 |
} |
} |
512 |
# Close off the relationship list. |
# Close off the relationship list. |
513 |
print HTMLOUT "</ul>\n"; |
$retVal .= "</ul>\n"; |
514 |
# Get the entity's relations. |
# Get the entity's relations. |
515 |
my $relationList = $entityData->{Relations}; |
my $relationList = $entityData->{Relations}; |
516 |
# Create a header for the relation subsection. |
# Create a header for the relation subsection. |
517 |
print HTMLOUT "<h4>Relations for <b>$key</b></h4>\n"; |
$retVal .= "<h4>Relations for <b>$key</b></h4>\n"; |
518 |
# Loop through the relations, displaying them. |
# Loop through the relations, displaying them. |
519 |
for my $relation (sort keys %{$relationList}) { |
for my $relation (sort keys %{$relationList}) { |
520 |
my $htmlString = _ShowRelationTable($relation, $relationList->{$relation}); |
my $htmlString = _ShowRelationTable($relation, $relationList->{$relation}); |
521 |
print HTMLOUT $htmlString; |
$retVal .= $htmlString; |
522 |
} |
} |
523 |
} |
} |
524 |
# Denote we're starting the relationship section. |
# Denote we're starting the relationship section. |
525 |
print HTMLOUT "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n"; |
$retVal .= "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n"; |
526 |
# Loop through the relationships. |
# Loop through the relationships. |
527 |
for my $key (sort keys %{$relationshipList}) { |
for my $key (sort keys %{$relationshipList}) { |
528 |
Trace("Building MetaData entry for $key relationship.") if T(4); |
Trace("Building MetaData entry for $key relationship.") if T(4); |
530 |
my $relationshipStructure = $relationshipList->{$key}; |
my $relationshipStructure = $relationshipList->{$key}; |
531 |
# Create the relationship header. |
# Create the relationship header. |
532 |
my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure); |
my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure); |
533 |
print HTMLOUT "<h3><a name=\"$key\"></a>$headerText</h3>\n"; |
$retVal .= "<h3><a name=\"$key\"></a>$headerText</h3>\n"; |
534 |
# Get the entity names. |
# Get the entity names. |
535 |
my $fromEntity = $relationshipStructure->{from}; |
my $fromEntity = $relationshipStructure->{from}; |
536 |
my $toEntity = $relationshipStructure->{to}; |
my $toEntity = $relationshipStructure->{to}; |
540 |
# since both sentences will say the same thing. |
# since both sentences will say the same thing. |
541 |
my $arity = $relationshipStructure->{arity}; |
my $arity = $relationshipStructure->{arity}; |
542 |
if ($arity eq "11") { |
if ($arity eq "11") { |
543 |
print HTMLOUT "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n"; |
$retVal .= "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n"; |
544 |
} else { |
} else { |
545 |
print HTMLOUT "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n"; |
$retVal .= "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n"; |
546 |
if ($arity eq "MM" && $fromEntity ne $toEntity) { |
if ($arity eq "MM" && $fromEntity ne $toEntity) { |
547 |
print HTMLOUT "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n"; |
$retVal .= "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n"; |
548 |
} |
} |
549 |
} |
} |
550 |
print HTMLOUT "</p>\n"; |
$retVal .= "</p>\n"; |
551 |
# If there are notes on this relationship, display them. |
# If there are notes on this relationship, display them. |
552 |
if (my $notes = $relationshipStructure->{Notes}) { |
if (my $notes = $relationshipStructure->{Notes}) { |
553 |
print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
$retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
554 |
} |
} |
555 |
# Generate the relationship's relation table. |
# Generate the relationship's relation table. |
556 |
my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key}); |
my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key}); |
557 |
print HTMLOUT $htmlString; |
$retVal .= $htmlString; |
558 |
} |
} |
559 |
Trace("Building MetaData join table.") if T(4); |
Trace("Building MetaData join table.") if T(4); |
560 |
# Denote we're starting the join table. |
# Denote we're starting the join table. |
561 |
print HTMLOUT "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n"; |
$retVal .= "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n"; |
562 |
# Create a table header. |
# Create a table header. |
563 |
print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition"); |
$retVal .= _OpenTable("Join Table", "Source", "Target", "Join Condition"); |
564 |
# Loop through the joins. |
# Loop through the joins. |
565 |
my $joinTable = $metadata->{Joins}; |
my $joinTable = $metadata->{Joins}; |
566 |
my @joinKeys = keys %{$joinTable}; |
my @joinKeys = keys %{$joinTable}; |
573 |
my $target = $self->ComputeObjectSentence($targetRelation); |
my $target = $self->ComputeObjectSentence($targetRelation); |
574 |
my $clause = $joinTable->{$joinKey}; |
my $clause = $joinTable->{$joinKey}; |
575 |
# Display them in a table row. |
# Display them in a table row. |
576 |
print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n"; |
$retVal .= "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n"; |
577 |
} |
} |
578 |
# Close the table. |
# Close the table. |
579 |
print HTMLOUT _CloseTable(); |
$retVal .= _CloseTable(); |
580 |
# Close the document. |
Trace("Built MetaData HTML.") if T(3); |
581 |
print HTMLOUT "</body>\n</html>\n"; |
# Return the HTML. |
582 |
# Close the file. |
return $retVal; |
|
close HTMLOUT; |
|
|
Trace("Built MetaData web page.") if T(3); |
|
583 |
} |
} |
584 |
|
|
585 |
=head3 DumpMetaData |
=head3 DumpMetaData |
745 |
return $retVal; |
return $retVal; |
746 |
} |
} |
747 |
|
|
748 |
|
=head3 DigestFields |
749 |
|
|
750 |
|
C<< $erdb->DigestFields($relName, $fieldList); >> |
751 |
|
|
752 |
|
Digest the strings in the field list that correspond to data type C<hash-string> in the |
753 |
|
specified relation. |
754 |
|
|
755 |
|
=over 4 |
756 |
|
|
757 |
|
=item relName |
758 |
|
|
759 |
|
Name of the relation to which the fields belong. |
760 |
|
|
761 |
|
=item fieldList |
762 |
|
|
763 |
|
List of field contents to be loaded into the relation. |
764 |
|
|
765 |
|
=back |
766 |
|
|
767 |
|
=cut |
768 |
|
#: Return Type ; |
769 |
|
sub DigestFields { |
770 |
|
# Get the parameters. |
771 |
|
my ($self, $relName, $fieldList) = @_; |
772 |
|
# Get the relation definition. |
773 |
|
my $relData = $self->_FindRelation($relName); |
774 |
|
# Get the list of field descriptors. |
775 |
|
my $fieldTypes = $relData->{Fields}; |
776 |
|
my $fieldCount = scalar @{$fieldTypes}; |
777 |
|
# Loop through the two lists. |
778 |
|
for (my $i = 0; $i < $fieldCount; $i++) { |
779 |
|
# Get the type of the current field. |
780 |
|
my $fieldType = $fieldTypes->[$i]->{type}; |
781 |
|
# If it's a hash string, digest it in place. |
782 |
|
if ($fieldType eq 'hash-string') { |
783 |
|
$fieldList->[$i] = $self->DigestKey($fieldList->[$i]); |
784 |
|
} |
785 |
|
} |
786 |
|
} |
787 |
|
|
788 |
|
=head3 DigestKey |
789 |
|
|
790 |
|
C<< my $digested = $erdb->DigestKey($keyValue); >> |
791 |
|
|
792 |
|
Return the digested value of a symbolic key. The digested value can then be plugged into a |
793 |
|
key-based search into a table with key-type hash-string. |
794 |
|
|
795 |
|
Currently the digesting process is independent of the database structure, but that may not |
796 |
|
always be the case, so this is an instance method instead of a static method. |
797 |
|
|
798 |
|
=over 4 |
799 |
|
|
800 |
|
=item keyValue |
801 |
|
|
802 |
|
Key value to digest. |
803 |
|
|
804 |
|
=item RETURN |
805 |
|
|
806 |
|
Digested value ofthe key. |
807 |
|
|
808 |
|
=back |
809 |
|
|
810 |
|
=cut |
811 |
|
|
812 |
|
sub DigestKey { |
813 |
|
# Get the parameters. |
814 |
|
my ($self, $keyValue) = @_; |
815 |
|
# Compute the digest. |
816 |
|
my $retVal = md5_base64($keyValue); |
817 |
|
# Return the result. |
818 |
|
return $retVal; |
819 |
|
} |
820 |
|
|
821 |
=head3 CreateIndex |
=head3 CreateIndex |
822 |
|
|
823 |
C<< $erdb->CreateIndex($relationName); >> |
C<< $erdb->CreateIndex($relationName); >> |
979 |
|
|
980 |
=head3 Get |
=head3 Get |
981 |
|
|
982 |
C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my $query = $erdb->Get(\@objectNames, $filterClause, \@params); >> |
983 |
|
|
984 |
This method returns a query object for entities of a specified type using a specified filter. |
This method returns a query object for entities of a specified type using a specified filter. |
985 |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
987 |
following call requests all B<Genome> objects for the genus specified in the variable |
following call requests all B<Genome> objects for the genus specified in the variable |
988 |
$genus. |
$genus. |
989 |
|
|
990 |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", [$genus]); >> |
991 |
|
|
992 |
The WHERE clause contains a single question mark, so there is a single additional |
The WHERE clause contains a single question mark, so there is a single additional |
993 |
parameter representing the parameter value. It would also be possible to code |
parameter representing the parameter value. It would also be possible to code |
1004 |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
1005 |
one object's data at the same time, which allows highly complex joined queries. For example, |
one object's data at the same time, which allows highly complex joined queries. For example, |
1006 |
|
|
1007 |
C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >> |
1008 |
|
|
1009 |
If multiple names are specified, then the query processor will automatically determine a |
If multiple names are specified, then the query processor will automatically determine a |
1010 |
join path between the entities and relationships. The algorithm used is very simplistic. |
join path between the entities and relationships. The algorithm used is very simplistic. |
1011 |
In particular, you can't specify any entity or relationship more than once, and if a |
In particular, if a relationship is recursive, the path is determined by the order in which |
1012 |
relationship is recursive, the path is determined by the order in which the entity |
the entity and the relationship appear. For example, consider a recursive relationship |
1013 |
and the relationship appear. For example, consider a recursive relationship B<IsParentOf> |
B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is |
|
which relates B<People> objects to other B<People> objects. If the join path is |
|
1014 |
coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, |
coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, |
1015 |
the join path is C<['IsParentOf', 'People']>, then the people returned will be children. |
the join path is C<['IsParentOf', 'People']>, then the people returned will be children. |
1016 |
|
|
1017 |
|
If an entity or relationship is mentioned twice, the name for the second occurrence will |
1018 |
|
be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So, |
1019 |
|
for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the |
1020 |
|
B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while |
1021 |
|
the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>. |
1022 |
|
|
1023 |
=over 4 |
=over 4 |
1024 |
|
|
1025 |
=item objectNames |
=item objectNames |
1049 |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
1050 |
relation. |
relation. |
1051 |
|
|
1052 |
=item param1, param2, ..., paramN |
Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must |
1053 |
|
be the last thing in the filter clause, and it contains only the word "LIMIT" followed by |
1054 |
|
a positive number. So, for example |
1055 |
|
|
1056 |
|
C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >> |
1057 |
|
|
1058 |
Parameter values to be substituted into the filter clause. |
will only return the first ten genomes for the specified genus. The ORDER BY clause is not |
1059 |
|
required. For example, to just get the first 10 genomes in the B<Genome> table, you could |
1060 |
|
use |
1061 |
|
|
1062 |
|
C<< "LIMIT 10" >> |
1063 |
|
|
1064 |
|
=item params |
1065 |
|
|
1066 |
|
Reference to a list of parameter values to be substituted into the filter clause. |
1067 |
|
|
1068 |
=item RETURN |
=item RETURN |
1069 |
|
|
1075 |
|
|
1076 |
sub Get { |
sub Get { |
1077 |
# Get the parameters. |
# Get the parameters. |
1078 |
my ($self, $objectNames, $filterClause, @params) = @_; |
my ($self, $objectNames, $filterClause, $params) = @_; |
1079 |
# Construct the SELECT statement. The general pattern is |
# Process the SQL stuff. |
1080 |
# |
my ($suffix, $mappedNameListRef, $mappedNameHashRef) = |
1081 |
# SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN |
$self->_SetupSQL($objectNames, $filterClause); |
1082 |
# |
# Create the query. |
1083 |
my $dbh = $self->{_dbh}; |
my $command = "SELECT DISTINCT " . join(".*, ", @{$mappedNameListRef}) . |
1084 |
my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " . |
".* $suffix"; |
1085 |
join(', ', @{$objectNames}); |
my $sth = $self->_GetStatementHandle($command, $params); |
1086 |
# Check for a filter clause. |
# Now we create the relation map, which enables DBQuery to determine the order, name |
1087 |
if ($filterClause) { |
# and mapped name for each object in the query. |
1088 |
# Here we have one, so we convert its field names and add it to the query. First, |
my @relationMap = (); |
1089 |
# We create a copy of the filter string we can work with. |
for my $mappedName (@{$mappedNameListRef}) { |
1090 |
my $filterString = $filterClause; |
push @relationMap, [$mappedName, $mappedNameHashRef->{$mappedName}]; |
|
# Next, we sort the object names by length. This helps protect us from finding |
|
|
# object names inside other object names when we're doing our search and replace. |
|
|
my @sortedNames = sort { length($b) - length($a) } @{$objectNames}; |
|
|
# We will also keep a list of conditions to add to the WHERE clause in order to link |
|
|
# entities and relationships as well as primary relations to secondary ones. |
|
|
my @joinWhere = (); |
|
|
# The final preparatory step is to create a hash table of relation names. The |
|
|
# table begins with the relation names already in the SELECT command. |
|
|
my %fromNames = (); |
|
|
for my $objectName (@sortedNames) { |
|
|
$fromNames{$objectName} = 1; |
|
|
} |
|
|
# We are ready to begin. We loop through the object names, replacing each |
|
|
# object name's field references by the corresponding SQL field reference. |
|
|
# Along the way, if we find a secondary relation, we will need to add it |
|
|
# to the FROM clause. |
|
|
for my $objectName (@sortedNames) { |
|
|
# Get the length of the object name plus 2. This is the value we add to the |
|
|
# size of the field name to determine the size of the field reference as a |
|
|
# whole. |
|
|
my $nameLength = 2 + length $objectName; |
|
|
# Get the object's field list. |
|
|
my $fieldList = $self->GetFieldTable($objectName); |
|
|
# Find the field references for this object. |
|
|
while ($filterString =~ m/$objectName\(([^)]*)\)/g) { |
|
|
# At this point, $1 contains the field name, and the current position |
|
|
# is set immediately after the final parenthesis. We pull out the name of |
|
|
# the field and the position and length of the field reference as a whole. |
|
|
my $fieldName = $1; |
|
|
my $len = $nameLength + length $fieldName; |
|
|
my $pos = pos($filterString) - $len; |
|
|
# Insure the field exists. |
|
|
if (!exists $fieldList->{$fieldName}) { |
|
|
Confess("Field $fieldName not found for object $objectName."); |
|
|
} else { |
|
|
# Get the field's relation. |
|
|
my $relationName = $fieldList->{$fieldName}->{relation}; |
|
|
# Insure the relation is in the FROM clause. |
|
|
if (!exists $fromNames{$relationName}) { |
|
|
# Add the relation to the FROM clause. |
|
|
$command .= ", $relationName"; |
|
|
# Create its join sub-clause. |
|
|
push @joinWhere, "$objectName.id = $relationName.id"; |
|
|
# Denote we have it available for future fields. |
|
|
$fromNames{$relationName} = 1; |
|
|
} |
|
|
# Form an SQL field reference from the relation name and the field name. |
|
|
my $sqlReference = "$relationName." . _FixName($fieldName); |
|
|
# Put it into the filter string in place of the old value. |
|
|
substr($filterString, $pos, $len) = $sqlReference; |
|
|
# Reposition the search. |
|
|
pos $filterString = $pos + length $sqlReference; |
|
|
} |
|
|
} |
|
|
} |
|
|
# The next step is to join the objects together. We only need to do this if there |
|
|
# is more than one object in the object list. We start with the first object and |
|
|
# run through the objects after it. Note also that we make a safety copy of the |
|
|
# list before running through it. |
|
|
my @objectList = @{$objectNames}; |
|
|
my $lastObject = shift @objectList; |
|
|
# Get the join table. |
|
|
my $joinTable = $self->{_metaData}->{Joins}; |
|
|
# Loop through the object list. |
|
|
for my $thisObject (@objectList) { |
|
|
# Look for a join. |
|
|
my $joinKey = "$lastObject/$thisObject"; |
|
|
if (!exists $joinTable->{$joinKey}) { |
|
|
# Here there's no join, so we throw an error. |
|
|
Confess("No join exists to connect from $lastObject to $thisObject."); |
|
|
} else { |
|
|
# Get the join clause and add it to the WHERE list. |
|
|
push @joinWhere, $joinTable->{$joinKey}; |
|
|
# Save this object as the last object for the next iteration. |
|
|
$lastObject = $thisObject; |
|
|
} |
|
|
} |
|
|
# Now we need to handle the whole ORDER BY / LIMIT thing. The important part |
|
|
# here is we want the filter clause to be empty if there's no WHERE filter. |
|
|
# We'll put the ORDER BY / LIMIT clauses in the following variable. |
|
|
my $orderClause = ""; |
|
|
# Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy |
|
|
# operator so that we find the first occurrence of either verb. |
|
|
if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) { |
|
|
# Here we have an ORDER BY or LIMIT verb. Split it off of the filter string. |
|
|
my $pos = pos $filterString; |
|
|
$orderClause = $2 . substr($filterString, $pos); |
|
|
$filterString = $1; |
|
|
} |
|
|
# Add the filter and the join clauses (if any) to the SELECT command. |
|
|
if ($filterString) { |
|
|
push @joinWhere, "($filterString)"; |
|
|
} |
|
|
if (@joinWhere) { |
|
|
$command .= " WHERE " . join(' AND ', @joinWhere); |
|
|
} |
|
|
# Add the sort or limit clause (if any) to the SELECT command. |
|
|
if ($orderClause) { |
|
|
$command .= " $orderClause"; |
|
|
} |
|
1091 |
} |
} |
|
Trace("SQL query: $command") if T(SQL => 4); |
|
|
Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0)); |
|
|
my $sth = $dbh->prepare_command($command); |
|
|
# Execute it with the parameters bound in. |
|
|
$sth->execute(@params) || Confess("SELECT error" . $sth->errstr()); |
|
1092 |
# Return the statement object. |
# Return the statement object. |
1093 |
my $retVal = DBQuery::_new($self, $sth, @{$objectNames}); |
my $retVal = DBQuery::_new($self, $sth, \@relationMap); |
1094 |
return $retVal; |
return $retVal; |
1095 |
} |
} |
1096 |
|
|
1097 |
|
=head3 GetFlat |
1098 |
|
|
1099 |
|
C<< my @list = $erdb->GetFlat(\@objectNames, $filterClause, \@parameterList, $field); >> |
1100 |
|
|
1101 |
|
This is a variation of L</GetAll> that asks for only a single field per record and |
1102 |
|
returns a single flattened list. |
1103 |
|
|
1104 |
|
=over 4 |
1105 |
|
|
1106 |
|
=item objectNames |
1107 |
|
|
1108 |
|
List containing the names of the entity and relationship objects to be retrieved. |
1109 |
|
|
1110 |
|
=item filterClause |
1111 |
|
|
1112 |
|
WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can |
1113 |
|
be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form |
1114 |
|
B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the |
1115 |
|
parameter list as additional parameters. The fields in a filter clause can come from primary |
1116 |
|
entity relations, relationship relations, or secondary entity relations; however, all of the |
1117 |
|
entities and relationships involved must be included in the list of object names. |
1118 |
|
|
1119 |
|
=item parameterList |
1120 |
|
|
1121 |
|
List of the parameters to be substituted in for the parameters marks in the filter clause. |
1122 |
|
|
1123 |
|
=item field |
1124 |
|
|
1125 |
|
Name of the field to be used to get the elements of the list returned. |
1126 |
|
|
1127 |
|
=item RETURN |
1128 |
|
|
1129 |
|
Returns a list of values. |
1130 |
|
|
1131 |
|
=back |
1132 |
|
|
1133 |
|
=cut |
1134 |
|
#: Return Type @; |
1135 |
|
sub GetFlat { |
1136 |
|
# Get the parameters. |
1137 |
|
my ($self, $objectNames, $filterClause, $parameterList, $field) = @_; |
1138 |
|
# Construct the query. |
1139 |
|
my $query = $self->Get($objectNames, $filterClause, $parameterList); |
1140 |
|
# Create the result list. |
1141 |
|
my @retVal = (); |
1142 |
|
# Loop through the records, adding the field values found to the result list. |
1143 |
|
while (my $row = $query->Fetch()) { |
1144 |
|
push @retVal, $row->Value($field); |
1145 |
|
} |
1146 |
|
# Return the list created. |
1147 |
|
return @retVal; |
1148 |
|
} |
1149 |
|
|
1150 |
=head3 Delete |
=head3 Delete |
1151 |
|
|
1152 |
C<< my $stats = $erdb->Delete($entityName, $objectID); >> |
C<< my $stats = $erdb->Delete($entityName, $objectID); >> |
1317 |
|
|
1318 |
=head3 GetList |
=head3 GetList |
1319 |
|
|
1320 |
C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, \@params); >> |
1321 |
|
|
1322 |
Return a list of object descriptors for the specified objects as determined by the |
Return a list of object descriptors for the specified objects as determined by the |
1323 |
specified filter clause. |
specified filter clause. |
1351 |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
filter clause in general; however, odd things may happen if a sort field is from a secondary |
1352 |
relation. |
relation. |
1353 |
|
|
1354 |
=item param1, param2, ..., paramN |
=item params |
1355 |
|
|
1356 |
Parameter values to be substituted into the filter clause. |
Reference to a list of parameter values to be substituted into the filter clause. |
1357 |
|
|
1358 |
=item RETURN |
=item RETURN |
1359 |
|
|
1365 |
#: Return Type @% |
#: Return Type @% |
1366 |
sub GetList { |
sub GetList { |
1367 |
# Get the parameters. |
# Get the parameters. |
1368 |
my ($self, $objectNames, $filterClause, @params) = @_; |
my ($self, $objectNames, $filterClause, $params) = @_; |
1369 |
# Declare the return variable. |
# Declare the return variable. |
1370 |
my @retVal = (); |
my @retVal = (); |
1371 |
# Perform the query. |
# Perform the query. |
1372 |
my $query = $self->Get($objectNames, $filterClause, @params); |
my $query = $self->Get($objectNames, $filterClause, $params); |
1373 |
# Loop through the results. |
# Loop through the results. |
1374 |
while (my $object = $query->Fetch) { |
while (my $object = $query->Fetch) { |
1375 |
push @retVal, $object; |
push @retVal, $object; |
1378 |
return @retVal; |
return @retVal; |
1379 |
} |
} |
1380 |
|
|
1381 |
|
=head3 GetCount |
1382 |
|
|
1383 |
|
C<< my $count = $erdb->GetCount(\@objectNames, $filter, \@params); >> |
1384 |
|
|
1385 |
|
Return the number of rows found by a specified query. This method would |
1386 |
|
normally be used to count the records in a single table. For example, in a |
1387 |
|
genetics database |
1388 |
|
|
1389 |
|
my $count = $erdb->GetCount(['Genome'], 'Genome(genus-species) LIKE ?', ['homo %']); |
1390 |
|
|
1391 |
|
would return the number of genomes for the genus I<homo>. It is conceivable, however, |
1392 |
|
to use it to return records based on a join. For example, |
1393 |
|
|
1394 |
|
my $count = $erdb->GetCount(['HasFeature', 'Genome'], 'Genome(genus-species) LIKE ?', |
1395 |
|
['homo %']); |
1396 |
|
|
1397 |
|
would return the number of features for genomes in the genus I<homo>. Note that |
1398 |
|
only the rows from the first table are counted. If the above command were |
1399 |
|
|
1400 |
|
my $count = $erdb->GetCount(['Genome', 'Feature'], 'Genome(genus-species) LIKE ?', |
1401 |
|
['homo %']); |
1402 |
|
|
1403 |
|
it would return the number of genomes, not the number of genome/feature pairs. |
1404 |
|
|
1405 |
|
=over 4 |
1406 |
|
|
1407 |
|
=item objectNames |
1408 |
|
|
1409 |
|
Reference to a list of the objects (entities and relationships) included in the |
1410 |
|
query. |
1411 |
|
|
1412 |
|
=item filter |
1413 |
|
|
1414 |
|
A filter clause for restricting the query. The rules are the same as for the L</Get> |
1415 |
|
method. |
1416 |
|
|
1417 |
|
=item params |
1418 |
|
|
1419 |
|
Reference to a list of the parameter values to be substituted for the parameter marks |
1420 |
|
in the filter. |
1421 |
|
|
1422 |
|
=item RETURN |
1423 |
|
|
1424 |
|
Returns a count of the number of records in the first table that would satisfy |
1425 |
|
the query. |
1426 |
|
|
1427 |
|
=back |
1428 |
|
|
1429 |
|
=cut |
1430 |
|
|
1431 |
|
sub GetCount { |
1432 |
|
# Get the parameters. |
1433 |
|
my ($self, $objectNames, $filter, $params) = @_; |
1434 |
|
# Declare the return variable. |
1435 |
|
my $retVal; |
1436 |
|
# Find out if we're counting an entity or a relationship. |
1437 |
|
my $countedField; |
1438 |
|
if ($self->IsEntity($objectNames->[0])) { |
1439 |
|
$countedField = "id"; |
1440 |
|
} else { |
1441 |
|
# For a relationship we count the to-link because it's usually more |
1442 |
|
# numerous. Note we're automatically converting to the SQL form |
1443 |
|
# of the field name (to_link vs. to-link). |
1444 |
|
$countedField = "to_link"; |
1445 |
|
} |
1446 |
|
# Create the SQL command suffix to get the desired records. |
1447 |
|
my ($suffix, $mappedNameListRef, $mappedNameHashRef) = $self->_SetupSQL($objectNames, |
1448 |
|
$filter); |
1449 |
|
# Prefix it with text telling it we want a record count. |
1450 |
|
my $firstObject = $mappedNameListRef->[0]; |
1451 |
|
my $command = "SELECT COUNT($firstObject.$countedField) $suffix"; |
1452 |
|
# Prepare and execute the command. |
1453 |
|
my $sth = $self->_GetStatementHandle($command, $params); |
1454 |
|
# Get the count value. |
1455 |
|
($retVal) = $sth->fetchrow_array(); |
1456 |
|
# Check for a problem. |
1457 |
|
if (! defined($retVal)) { |
1458 |
|
if ($sth->err) { |
1459 |
|
# Here we had an SQL error. |
1460 |
|
Confess("Error retrieving row count: " . $sth->errstr()); |
1461 |
|
} else { |
1462 |
|
# Here we have no result. |
1463 |
|
Confess("No result attempting to retrieve row count."); |
1464 |
|
} |
1465 |
|
} |
1466 |
|
# Return the result. |
1467 |
|
return $retVal; |
1468 |
|
} |
1469 |
|
|
1470 |
=head3 ComputeObjectSentence |
=head3 ComputeObjectSentence |
1471 |
|
|
1472 |
C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >> |
C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >> |
1544 |
} |
} |
1545 |
} |
} |
1546 |
|
|
1547 |
|
=head3 InsertValue |
1548 |
|
|
1549 |
|
C<< $erdb->InsertValue($entityID, $fieldName, $value); >> |
1550 |
|
|
1551 |
|
This method will insert a new value into the database. The value must be one |
1552 |
|
associated with a secondary relation, since primary values cannot be inserted: |
1553 |
|
they occur exactly once. Secondary values, on the other hand, can be missing |
1554 |
|
or multiply-occurring. |
1555 |
|
|
1556 |
|
=over 4 |
1557 |
|
|
1558 |
|
=item entityID |
1559 |
|
|
1560 |
|
ID of the object that is to receive the new value. |
1561 |
|
|
1562 |
|
=item fieldName |
1563 |
|
|
1564 |
|
Field name for the new value-- this includes the entity name, since |
1565 |
|
field names are of the format I<objectName>C<(>I<fieldName>C<)>. |
1566 |
|
|
1567 |
|
=item value |
1568 |
|
|
1569 |
|
New value to be put in the field. |
1570 |
|
|
1571 |
|
=back |
1572 |
|
|
1573 |
|
=cut |
1574 |
|
|
1575 |
|
sub InsertValue { |
1576 |
|
# Get the parameters. |
1577 |
|
my ($self, $entityID, $fieldName, $value) = @_; |
1578 |
|
# Parse the entity name and the real field name. |
1579 |
|
if ($fieldName =~ /^([^(]+)\(([^)]+)\)/) { |
1580 |
|
my $entityName = $1; |
1581 |
|
my $fieldTitle = $2; |
1582 |
|
# Get its descriptor. |
1583 |
|
if (!$self->IsEntity($entityName)) { |
1584 |
|
Confess("$entityName is not a valid entity."); |
1585 |
|
} else { |
1586 |
|
my $entityData = $self->{_metaData}->{Entities}->{$entityName}; |
1587 |
|
# Find the relation containing this field. |
1588 |
|
my $fieldHash = $entityData->{Fields}; |
1589 |
|
if (! exist $fieldHash->{$fieldTitle}) { |
1590 |
|
Confess("$fieldTitle not found in $entityName."); |
1591 |
|
} else { |
1592 |
|
my $relation = $fieldHash->{$fieldTitle}->{relation}; |
1593 |
|
if ($relation eq $entityName) { |
1594 |
|
Confess("Cannot do InsertValue on primary field $fieldTitle of $entityName."); |
1595 |
|
} else { |
1596 |
|
# Now we can create an INSERT statement. |
1597 |
|
my $dbh = $self->{_dbh}; |
1598 |
|
my $fixedName = _FixName($fieldTitle); |
1599 |
|
my $statement = "INSERT INTO $relation ($fixedName) VALUES(?, ?)"; |
1600 |
|
# Execute the command. |
1601 |
|
$dbh->SQL($statement, 0, $entityID, $value); |
1602 |
|
} |
1603 |
|
} |
1604 |
|
} |
1605 |
|
} else { |
1606 |
|
Confess("$fieldName is not a valid field name."); |
1607 |
|
} |
1608 |
|
} |
1609 |
|
|
1610 |
=head3 InsertObject |
=head3 InsertObject |
1611 |
|
|
1612 |
C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >> |
C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >> |
1925 |
# Get the parameters. |
# Get the parameters. |
1926 |
my ($self, $entityType, $ID) = @_; |
my ($self, $entityType, $ID) = @_; |
1927 |
# Create a query. |
# Create a query. |
1928 |
my $query = $self->Get([$entityType], "$entityType(id) = ?", $ID); |
my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]); |
1929 |
# Get the first (and only) object. |
# Get the first (and only) object. |
1930 |
my $retVal = $query->Fetch(); |
my $retVal = $query->Fetch(); |
1931 |
# Return the result. |
# Return the result. |
2038 |
# list is a scalar we convert it into a singleton list. |
# list is a scalar we convert it into a singleton list. |
2039 |
my @parmList = (); |
my @parmList = (); |
2040 |
if (ref $parameterList eq "ARRAY") { |
if (ref $parameterList eq "ARRAY") { |
2041 |
|
Trace("GetAll parm list is an array.") if T(4); |
2042 |
@parmList = @{$parameterList}; |
@parmList = @{$parameterList}; |
2043 |
} else { |
} else { |
2044 |
|
Trace("GetAll parm list is a scalar: $parameterList.") if T(4); |
2045 |
push @parmList, $parameterList; |
push @parmList, $parameterList; |
2046 |
} |
} |
2047 |
# Insure the counter has a value. |
# Insure the counter has a value. |
2053 |
$filterClause .= " LIMIT $count"; |
$filterClause .= " LIMIT $count"; |
2054 |
} |
} |
2055 |
# Create the query. |
# Create the query. |
2056 |
my $query = $self->Get($objectNames, $filterClause, @parmList); |
my $query = $self->Get($objectNames, $filterClause, \@parmList); |
2057 |
# Set up a counter of the number of records read. |
# Set up a counter of the number of records read. |
2058 |
my $fetched = 0; |
my $fetched = 0; |
2059 |
# Loop through the records returned, extracting the fields. Note that if the |
# Loop through the records returned, extracting the fields. Note that if the |
2135 |
return $objectData->{Fields}; |
return $objectData->{Fields}; |
2136 |
} |
} |
2137 |
|
|
2138 |
|
=head2 Data Mining Methods |
2139 |
|
|
2140 |
=head3 GetUsefulCrossValues |
=head3 GetUsefulCrossValues |
2141 |
|
|
2142 |
C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >> |
C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >> |
2198 |
return @retVal; |
return @retVal; |
2199 |
} |
} |
2200 |
|
|
2201 |
|
=head3 FindColumn |
2202 |
|
|
2203 |
|
C<< my $colIndex = ERDB::FindColumn($headerLine, $columnIdentifier); >> |
2204 |
|
|
2205 |
|
Return the location a desired column in a data mining header line. The data |
2206 |
|
mining header line is a tab-separated list of column names. The column |
2207 |
|
identifier is either the numerical index of a column or the actual column |
2208 |
|
name. |
2209 |
|
|
2210 |
|
=over 4 |
2211 |
|
|
2212 |
|
=item headerLine |
2213 |
|
|
2214 |
|
The header line from a data mining command, which consists of a tab-separated |
2215 |
|
list of column names. |
2216 |
|
|
2217 |
|
=item columnIdentifier |
2218 |
|
|
2219 |
|
Either the ordinal number of the desired column (1-based), or the name of the |
2220 |
|
desired column. |
2221 |
|
|
2222 |
|
=item RETURN |
2223 |
|
|
2224 |
|
Returns the array index (0-based) of the desired column. |
2225 |
|
|
2226 |
|
=back |
2227 |
|
|
2228 |
|
=cut |
2229 |
|
|
2230 |
|
sub FindColumn { |
2231 |
|
# Get the parameters. |
2232 |
|
my ($headerLine, $columnIdentifier) = @_; |
2233 |
|
# Declare the return variable. |
2234 |
|
my $retVal; |
2235 |
|
# Split the header line into column names. |
2236 |
|
my @headers = ParseColumns($headerLine); |
2237 |
|
# Determine whether we have a number or a name. |
2238 |
|
if ($columnIdentifier =~ /^\d+$/) { |
2239 |
|
# Here we have a number. Subtract 1 and validate the result. |
2240 |
|
$retVal = $columnIdentifier - 1; |
2241 |
|
if ($retVal < 0 || $retVal > $#headers) { |
2242 |
|
Confess("Invalid column identifer \"$columnIdentifier\": value out of range."); |
2243 |
|
} |
2244 |
|
} else { |
2245 |
|
# Here we have a name. We need to find it in the list. |
2246 |
|
for (my $i = 0; $i <= $#headers && ! defined($retVal); $i++) { |
2247 |
|
if ($headers[$i] eq $columnIdentifier) { |
2248 |
|
$retVal = $i; |
2249 |
|
} |
2250 |
|
} |
2251 |
|
if (! defined($retVal)) { |
2252 |
|
Confess("Invalid column identifier \"$columnIdentifier\": value not found."); |
2253 |
|
} |
2254 |
|
} |
2255 |
|
# Return the result. |
2256 |
|
return $retVal; |
2257 |
|
} |
2258 |
|
|
2259 |
|
=head3 ParseColumns |
2260 |
|
|
2261 |
|
C<< my @columns = ERDB::ParseColumns($line); >> |
2262 |
|
|
2263 |
|
Convert the specified data line to a list of columns. |
2264 |
|
|
2265 |
|
=over 4 |
2266 |
|
|
2267 |
|
=item line |
2268 |
|
|
2269 |
|
A data mining input, consisting of a tab-separated list of columns terminated by a |
2270 |
|
new-line. |
2271 |
|
|
2272 |
|
=item RETURN |
2273 |
|
|
2274 |
|
Returns a list consisting of the column values. |
2275 |
|
|
2276 |
|
=back |
2277 |
|
|
2278 |
|
=cut |
2279 |
|
|
2280 |
|
sub ParseColumns { |
2281 |
|
# Get the parameters. |
2282 |
|
my ($line) = @_; |
2283 |
|
# Chop off the line-end. |
2284 |
|
chomp $line; |
2285 |
|
# Split it into a list. |
2286 |
|
my @retVal = split(/\t/, $line); |
2287 |
|
# Return the result. |
2288 |
|
return @retVal; |
2289 |
|
} |
2290 |
|
|
2291 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
2292 |
|
|
2293 |
|
=head3 SetupSQL |
2294 |
|
|
2295 |
|
Process a list of object names and a filter clause so that they can be used to |
2296 |
|
build an SQL statement. This method takes in a reference to a list of object names |
2297 |
|
and a filter clause. It will return a corrected filter clause, a list of mapped |
2298 |
|
names and the mapped name hash. |
2299 |
|
|
2300 |
|
This is an instance method. |
2301 |
|
|
2302 |
|
=over 4 |
2303 |
|
|
2304 |
|
=item objectNames |
2305 |
|
|
2306 |
|
Reference to a list of the object names to be included in the query. |
2307 |
|
|
2308 |
|
=item filterClause |
2309 |
|
|
2310 |
|
A string containing the WHERE clause for the query (without the C<WHERE>) and also |
2311 |
|
optionally the C<ORDER BY> and C<LIMIT> clauses. |
2312 |
|
|
2313 |
|
=item RETURN |
2314 |
|
|
2315 |
|
Returns a three-element list. The first element is the SQL statement suffix, beginning |
2316 |
|
with the FROM clause. The second element is a reference to a list of the names to be |
2317 |
|
used in retrieving the fields. The third element is a hash mapping the names to the |
2318 |
|
objects they represent. |
2319 |
|
|
2320 |
|
=back |
2321 |
|
|
2322 |
|
=cut |
2323 |
|
|
2324 |
|
sub _SetupSQL { |
2325 |
|
my ($self, $objectNames, $filterClause) = @_; |
2326 |
|
# Adjust the list of object names to account for multiple occurrences of the |
2327 |
|
# same object. We start with a hash table keyed on object name that will |
2328 |
|
# return the object suffix. The first time an object is encountered it will |
2329 |
|
# not be found in the hash. The next time the hash will map the object name |
2330 |
|
# to 2, then 3, and so forth. |
2331 |
|
my %objectHash = (); |
2332 |
|
# This list will contain the object names as they are to appear in the |
2333 |
|
# FROM list. |
2334 |
|
my @fromList = (); |
2335 |
|
# This list contains the suffixed object name for each object. It is exactly |
2336 |
|
# parallel to the list in the $objectNames parameter. |
2337 |
|
my @mappedNameList = (); |
2338 |
|
# Finally, this hash translates from a mapped name to its original object name. |
2339 |
|
my %mappedNameHash = (); |
2340 |
|
# Now we create the lists. Note that for every single name we push something into |
2341 |
|
# @fromList and @mappedNameList. This insures that those two arrays are exactly |
2342 |
|
# parallel to $objectNames. |
2343 |
|
for my $objectName (@{$objectNames}) { |
2344 |
|
# Get the next suffix for this object. |
2345 |
|
my $suffix = $objectHash{$objectName}; |
2346 |
|
if (! $suffix) { |
2347 |
|
# Here we are seeing the object for the first time. The object name |
2348 |
|
# is used as is. |
2349 |
|
push @mappedNameList, $objectName; |
2350 |
|
push @fromList, $objectName; |
2351 |
|
$mappedNameHash{$objectName} = $objectName; |
2352 |
|
# Denote the next suffix will be 2. |
2353 |
|
$objectHash{$objectName} = 2; |
2354 |
|
} else { |
2355 |
|
# Here we've seen the object before. We construct a new name using |
2356 |
|
# the suffix from the hash and update the hash. |
2357 |
|
my $mappedName = "$objectName$suffix"; |
2358 |
|
$objectHash{$objectName} = $suffix + 1; |
2359 |
|
# The FROM list has the object name followed by the mapped name. This |
2360 |
|
# tells SQL it's still the same table, but we're using a different name |
2361 |
|
# for it to avoid confusion. |
2362 |
|
push @fromList, "$objectName $mappedName"; |
2363 |
|
# The mapped-name list contains the real mapped name. |
2364 |
|
push @mappedNameList, $mappedName; |
2365 |
|
# Finally, enable us to get back from the mapped name to the object name. |
2366 |
|
$mappedNameHash{$mappedName} = $objectName; |
2367 |
|
} |
2368 |
|
} |
2369 |
|
# Begin the SELECT suffix. It starts with |
2370 |
|
# |
2371 |
|
# FROM name1, name2, ... nameN |
2372 |
|
# |
2373 |
|
my $suffix = "FROM " . join(', ', @fromList); |
2374 |
|
# Check for a filter clause. |
2375 |
|
if ($filterClause) { |
2376 |
|
# Here we have one, so we convert its field names and add it to the query. First, |
2377 |
|
# We create a copy of the filter string we can work with. |
2378 |
|
my $filterString = $filterClause; |
2379 |
|
# Next, we sort the object names by length. This helps protect us from finding |
2380 |
|
# object names inside other object names when we're doing our search and replace. |
2381 |
|
my @sortedNames = sort { length($b) - length($a) } @mappedNameList; |
2382 |
|
# We will also keep a list of conditions to add to the WHERE clause in order to link |
2383 |
|
# entities and relationships as well as primary relations to secondary ones. |
2384 |
|
my @joinWhere = (); |
2385 |
|
# The final preparatory step is to create a hash table of relation names. The |
2386 |
|
# table begins with the relation names already in the SELECT command. We may |
2387 |
|
# need to add relations later if there is filtering on a field in a secondary |
2388 |
|
# relation. The secondary relations are the ones that contain multiply- |
2389 |
|
# occurring or optional fields. |
2390 |
|
my %fromNames = map { $_ => 1 } @sortedNames; |
2391 |
|
# We are ready to begin. We loop through the object names, replacing each |
2392 |
|
# object name's field references by the corresponding SQL field reference. |
2393 |
|
# Along the way, if we find a secondary relation, we will need to add it |
2394 |
|
# to the FROM clause. |
2395 |
|
for my $mappedName (@sortedNames) { |
2396 |
|
# Get the length of the object name plus 2. This is the value we add to the |
2397 |
|
# size of the field name to determine the size of the field reference as a |
2398 |
|
# whole. |
2399 |
|
my $nameLength = 2 + length $mappedName; |
2400 |
|
# Get the real object name for this mapped name. |
2401 |
|
my $objectName = $mappedNameHash{$mappedName}; |
2402 |
|
Trace("Processing $mappedName for object $objectName.") if T(4); |
2403 |
|
# Get the object's field list. |
2404 |
|
my $fieldList = $self->GetFieldTable($objectName); |
2405 |
|
# Find the field references for this object. |
2406 |
|
while ($filterString =~ m/$mappedName\(([^)]*)\)/g) { |
2407 |
|
# At this point, $1 contains the field name, and the current position |
2408 |
|
# is set immediately after the final parenthesis. We pull out the name of |
2409 |
|
# the field and the position and length of the field reference as a whole. |
2410 |
|
my $fieldName = $1; |
2411 |
|
my $len = $nameLength + length $fieldName; |
2412 |
|
my $pos = pos($filterString) - $len; |
2413 |
|
# Insure the field exists. |
2414 |
|
if (!exists $fieldList->{$fieldName}) { |
2415 |
|
Confess("Field $fieldName not found for object $objectName."); |
2416 |
|
} else { |
2417 |
|
Trace("Processing $fieldName at position $pos.") if T(4); |
2418 |
|
# Get the field's relation. |
2419 |
|
my $relationName = $fieldList->{$fieldName}->{relation}; |
2420 |
|
# Now we have a secondary relation. We need to insure it matches the |
2421 |
|
# mapped name of the primary relation. First we peel off the suffix |
2422 |
|
# from the mapped name. |
2423 |
|
my $mappingSuffix = substr $mappedName, length($objectName); |
2424 |
|
# Put the mapping suffix onto the relation name to get the |
2425 |
|
# mapped relation name. |
2426 |
|
my $mappedRelationName = "$relationName$mappingSuffix"; |
2427 |
|
# Insure the relation is in the FROM clause. |
2428 |
|
if (!exists $fromNames{$mappedRelationName}) { |
2429 |
|
# Add the relation to the FROM clause. |
2430 |
|
if ($mappedRelationName eq $relationName) { |
2431 |
|
# The name is un-mapped, so we add it without |
2432 |
|
# any frills. |
2433 |
|
$suffix .= ", $relationName"; |
2434 |
|
push @joinWhere, "$objectName.id = $relationName.id"; |
2435 |
|
} else { |
2436 |
|
# Here we have a mapping situation. |
2437 |
|
$suffix .= ", $relationName $mappedRelationName"; |
2438 |
|
push @joinWhere, "$mappedRelationName.id = $mappedName.id"; |
2439 |
|
} |
2440 |
|
# Denote we have this relation available for future fields. |
2441 |
|
$fromNames{$mappedRelationName} = 1; |
2442 |
|
} |
2443 |
|
# Form an SQL field reference from the relation name and the field name. |
2444 |
|
my $sqlReference = "$mappedRelationName." . _FixName($fieldName); |
2445 |
|
# Put it into the filter string in place of the old value. |
2446 |
|
substr($filterString, $pos, $len) = $sqlReference; |
2447 |
|
# Reposition the search. |
2448 |
|
pos $filterString = $pos + length $sqlReference; |
2449 |
|
} |
2450 |
|
} |
2451 |
|
} |
2452 |
|
# The next step is to join the objects together. We only need to do this if there |
2453 |
|
# is more than one object in the object list. We start with the first object and |
2454 |
|
# run through the objects after it. Note also that we make a safety copy of the |
2455 |
|
# list before running through it. |
2456 |
|
my @mappedObjectList = @mappedNameList; |
2457 |
|
my $lastMappedObject = shift @mappedObjectList; |
2458 |
|
# Get the join table. |
2459 |
|
my $joinTable = $self->{_metaData}->{Joins}; |
2460 |
|
# Loop through the object list. |
2461 |
|
for my $thisMappedObject (@mappedObjectList) { |
2462 |
|
# Look for a join using the real object names. |
2463 |
|
my $lastObject = $mappedNameHash{$lastMappedObject}; |
2464 |
|
my $thisObject = $mappedNameHash{$thisMappedObject}; |
2465 |
|
my $joinKey = "$lastObject/$thisObject"; |
2466 |
|
if (!exists $joinTable->{$joinKey}) { |
2467 |
|
# Here there's no join, so we throw an error. |
2468 |
|
Confess("No join exists to connect from $lastMappedObject to $thisMappedObject."); |
2469 |
|
} else { |
2470 |
|
# Get the join clause. |
2471 |
|
my $unMappedJoin = $joinTable->{$joinKey}; |
2472 |
|
# Fix the names. |
2473 |
|
$unMappedJoin =~ s/$lastObject/$lastMappedObject/; |
2474 |
|
$unMappedJoin =~ s/$thisObject/$thisMappedObject/; |
2475 |
|
push @joinWhere, $unMappedJoin; |
2476 |
|
# Save this object as the last object for the next iteration. |
2477 |
|
$lastMappedObject = $thisMappedObject; |
2478 |
|
} |
2479 |
|
} |
2480 |
|
# Now we need to handle the whole ORDER BY / LIMIT thing. The important part |
2481 |
|
# here is we want the filter clause to be empty if there's no WHERE filter. |
2482 |
|
# We'll put the ORDER BY / LIMIT clauses in the following variable. |
2483 |
|
my $orderClause = ""; |
2484 |
|
# Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy |
2485 |
|
# operator so that we find the first occurrence of either verb. |
2486 |
|
if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) { |
2487 |
|
# Here we have an ORDER BY or LIMIT verb. Split it off of the filter string. |
2488 |
|
my $pos = pos $filterString; |
2489 |
|
$orderClause = $2 . substr($filterString, $pos); |
2490 |
|
$filterString = $1; |
2491 |
|
} |
2492 |
|
# Add the filter and the join clauses (if any) to the SELECT command. |
2493 |
|
if ($filterString) { |
2494 |
|
Trace("Filter string is \"$filterString\".") if T(4); |
2495 |
|
push @joinWhere, "($filterString)"; |
2496 |
|
} |
2497 |
|
if (@joinWhere) { |
2498 |
|
$suffix .= " WHERE " . join(' AND ', @joinWhere); |
2499 |
|
} |
2500 |
|
# Add the sort or limit clause (if any) to the SELECT command. |
2501 |
|
if ($orderClause) { |
2502 |
|
$suffix .= " $orderClause"; |
2503 |
|
} |
2504 |
|
} |
2505 |
|
# Return the suffix, the mapped name list, and the mapped name hash. |
2506 |
|
return ($suffix, \@mappedNameList, \%mappedNameHash); |
2507 |
|
} |
2508 |
|
|
2509 |
|
=head3 GetStatementHandle |
2510 |
|
|
2511 |
|
This method will prepare and execute an SQL query, returning the statement handle. |
2512 |
|
The main reason for doing this here is so that everybody who does SQL queries gets |
2513 |
|
the benefit of tracing. |
2514 |
|
|
2515 |
|
This is an instance method. |
2516 |
|
|
2517 |
|
=over 4 |
2518 |
|
|
2519 |
|
=item command |
2520 |
|
|
2521 |
|
Command to prepare and execute. |
2522 |
|
|
2523 |
|
=item params |
2524 |
|
|
2525 |
|
Reference to a list of the values to be substituted in for the parameter marks. |
2526 |
|
|
2527 |
|
=item RETURN |
2528 |
|
|
2529 |
|
Returns a prepared and executed statement handle from which the caller can extract |
2530 |
|
results. |
2531 |
|
|
2532 |
|
=back |
2533 |
|
|
2534 |
|
=cut |
2535 |
|
|
2536 |
|
sub _GetStatementHandle { |
2537 |
|
# Get the parameters. |
2538 |
|
my ($self, $command, $params) = @_; |
2539 |
|
# Trace the query. |
2540 |
|
Trace("SQL query: $command") if T(SQL => 3); |
2541 |
|
Trace("PARMS: '" . (join "', '", @{$params}) . "'") if (T(SQL => 4) && (@{$params} > 0)); |
2542 |
|
# Get the database handle. |
2543 |
|
my $dbh = $self->{_dbh}; |
2544 |
|
# Prepare the command. |
2545 |
|
my $sth = $dbh->prepare_command($command); |
2546 |
|
# Execute it with the parameters bound in. |
2547 |
|
$sth->execute(@{$params}) || Confess("SELECT error" . $sth->errstr()); |
2548 |
|
# Return the statement handle. |
2549 |
|
return $sth; |
2550 |
|
} |
2551 |
|
|
2552 |
=head3 GetLoadStats |
=head3 GetLoadStats |
2553 |
|
|
2554 |
Return a blank statistics object for use by the load methods. |
Return a blank statistics object for use by the load methods. |
3106 |
# Determine if this relationship has our entity in one of its link fields. |
# Determine if this relationship has our entity in one of its link fields. |
3107 |
my $fromEntity = $relationship->{from}; |
my $fromEntity = $relationship->{from}; |
3108 |
my $toEntity = $relationship->{to}; |
my $toEntity = $relationship->{to}; |
3109 |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4); |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4); |
3110 |
if ($fromEntity eq $entityName) { |
if ($fromEntity eq $entityName) { |
3111 |
if ($toEntity eq $entityName) { |
if ($toEntity eq $entityName) { |
3112 |
# Here the relationship is recursive. |
# Here the relationship is recursive. |
3195 |
return $metadata; |
return $metadata; |
3196 |
} |
} |
3197 |
|
|
3198 |
|
=head3 SortNeeded |
3199 |
|
|
3200 |
|
C<< my $flag = $erdb->SortNeeded($relationName); >> |
3201 |
|
|
3202 |
|
Return TRUE if the specified relation should be sorted during loading to remove duplicate keys, |
3203 |
|
else FALSE. |
3204 |
|
|
3205 |
|
=over 4 |
3206 |
|
|
3207 |
|
=item relationName |
3208 |
|
|
3209 |
|
Name of the relation to be examined. |
3210 |
|
|
3211 |
|
=item RETURN |
3212 |
|
|
3213 |
|
Returns TRUE if the relation needs a sort, else FALSE. |
3214 |
|
|
3215 |
|
=back |
3216 |
|
|
3217 |
|
=cut |
3218 |
|
#: Return Type $; |
3219 |
|
sub SortNeeded { |
3220 |
|
# Get the parameters. |
3221 |
|
my ($self, $relationName) = @_; |
3222 |
|
# Declare the return variable. |
3223 |
|
my $retVal = 0; |
3224 |
|
# Find out if the relation is a primary entity relation. |
3225 |
|
my $entityTable = $self->{_metaData}->{Entities}; |
3226 |
|
if (exists $entityTable->{$relationName}) { |
3227 |
|
my $keyType = $entityTable->{$relationName}->{keyType}; |
3228 |
|
Trace("Relation $relationName found in entity table with key type $keyType.") if T(3); |
3229 |
|
# If the key is not a hash string, we must do the sort. |
3230 |
|
if ($keyType ne 'hash-string') { |
3231 |
|
$retVal = 1; |
3232 |
|
} |
3233 |
|
} |
3234 |
|
# Return the result. |
3235 |
|
return $retVal; |
3236 |
|
} |
3237 |
|
|
3238 |
=head3 CreateRelationshipIndex |
=head3 CreateRelationshipIndex |
3239 |
|
|
3240 |
Create an index for a relationship's relation. |
Create an index for a relationship's relation. |