9 |
use DBObject; |
use DBObject; |
10 |
use Stats; |
use Stats; |
11 |
use Time::HiRes qw(gettimeofday); |
use Time::HiRes qw(gettimeofday); |
12 |
|
use FIG; |
13 |
|
|
14 |
=head1 Entity-Relationship Database Package |
=head1 Entity-Relationship Database Package |
15 |
|
|
301 |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
302 |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
303 |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
304 |
#string is specified in the field definition. |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
305 |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, dataGen => "StringGen('A')" }, |
# record sizes. |
306 |
int => { sqlType => 'INTEGER', maxLen => 20, dataGen => "IntGen(0, 99999999)" }, |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, dataGen => "StringGen('A')" }, |
307 |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, dataGen => "StringGen(IntGen(10,250))" }, |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, dataGen => "IntGen(0, 99999999)" }, |
308 |
text => { sqlType => 'TEXT', maxLen => 1000000000, dataGen => "StringGen(IntGen(80,1000))" }, |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, dataGen => "StringGen(IntGen(10,250))" }, |
309 |
date => { sqlType => 'BIGINT', maxLen => 80, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" }, |
310 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, dataGen => "FloatGen(0.0, 100.0)" }, |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
311 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, dataGen => "IntGen(0, 1)" }, |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
312 |
|
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 2, dataGen => "IntGen(0, 1)" }, |
313 |
'key-string' => |
'key-string' => |
314 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
315 |
'name-string' => |
'name-string' => |
316 |
{ sqlType => 'VARCHAR(80)', maxLen => 80, dataGen => "StringGen(IntGen(10,80))" }, |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, dataGen => "StringGen(IntGen(10,80))" }, |
317 |
'medium-string' => |
'medium-string' => |
318 |
{ sqlType => 'VARCHAR(160)', maxLen => 160, dataGen => "StringGen(IntGen(10,160))" }, |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, dataGen => "StringGen(IntGen(10,160))" }, |
319 |
); |
); |
320 |
|
|
321 |
# Table translating arities into natural language. |
# Table translating arities into natural language. |
371 |
|
|
372 |
=head3 ShowMetaData |
=head3 ShowMetaData |
373 |
|
|
374 |
C<< $database->ShowMetaData($fileName); >> |
C<< $erdb->ShowMetaData($fileName); >> |
375 |
|
|
376 |
This method outputs a description of the database. This description can be used to help users create |
This method outputs a description of the database. This description can be used to help users create |
377 |
the data to be loaded into the relations. |
the data to be loaded into the relations. |
526 |
|
|
527 |
=head3 DumpMetaData |
=head3 DumpMetaData |
528 |
|
|
529 |
C<< $database->DumpMetaData(); >> |
C<< $erdb->DumpMetaData(); >> |
530 |
|
|
531 |
Return a dump of the metadata structure. |
Return a dump of the metadata structure. |
532 |
|
|
541 |
|
|
542 |
=head3 CreateTables |
=head3 CreateTables |
543 |
|
|
544 |
C<< $datanase->CreateTables(); >> |
C<< $erdb->CreateTables(); >> |
545 |
|
|
546 |
This method creates the tables for the database from the metadata structure loaded by the |
This method creates the tables for the database from the metadata structure loaded by the |
547 |
constructor. It is expected this function will only be used on rare occasions, when the |
constructor. It is expected this function will only be used on rare occasions, when the |
553 |
sub CreateTables { |
sub CreateTables { |
554 |
# Get the parameters. |
# Get the parameters. |
555 |
my ($self) = @_; |
my ($self) = @_; |
556 |
my $metadata = $self->{_metaData}; |
# Get the relation names. |
557 |
my $dbh = $self->{_dbh}; |
my @relNames = $self->GetTableNames(); |
558 |
# Loop through the entities. |
# Loop through the relations. |
559 |
my $entityHash = $metadata->{Entities}; |
for my $relationName (@relNames) { |
|
for my $entityName (keys %{$entityHash}) { |
|
|
my $entityData = $entityHash->{$entityName}; |
|
|
# Tell the user what we're doing. |
|
|
Trace("Creating relations for entity $entityName.") if T(1); |
|
|
# Loop through the entity's relations. |
|
|
for my $relationName (keys %{$entityData->{Relations}}) { |
|
560 |
# Create a table for this relation. |
# Create a table for this relation. |
561 |
$self->CreateTable($relationName); |
$self->CreateTable($relationName); |
562 |
Trace("Relation $relationName created.") if T(1); |
Trace("Relation $relationName created.") if T(2); |
|
} |
|
|
} |
|
|
# Loop through the relationships. |
|
|
my $relationshipTable = $metadata->{Relationships}; |
|
|
for my $relationshipName (keys %{$metadata->{Relationships}}) { |
|
|
# Create a table for this relationship. |
|
|
Trace("Creating relationship $relationshipName.") if T(1); |
|
|
$self->CreateTable($relationshipName); |
|
563 |
} |
} |
564 |
} |
} |
565 |
|
|
566 |
=head3 CreateTable |
=head3 CreateTable |
567 |
|
|
568 |
C<< $database->CreateTable($tableName, $indexFlag); >> |
C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >> |
569 |
|
|
570 |
Create the table for a relation and optionally create its indexes. |
Create the table for a relation and optionally create its indexes. |
571 |
|
|
575 |
|
|
576 |
Name of the relation (which will also be the table name). |
Name of the relation (which will also be the table name). |
577 |
|
|
578 |
=item $indexFlag |
=item indexFlag |
579 |
|
|
580 |
TRUE if the indexes for the relation should be created, else FALSE. If FALSE, |
TRUE if the indexes for the relation should be created, else FALSE. If FALSE, |
581 |
L</CreateIndexes> must be called later to bring the indexes into existence. |
L</CreateIndexes> must be called later to bring the indexes into existence. |
582 |
|
|
583 |
|
=item estimatedRows (optional) |
584 |
|
|
585 |
|
If specified, the estimated maximum number of rows for the relation. This |
586 |
|
information allows the creation of tables using storage engines that are |
587 |
|
faster but require size estimates, such as MyISAM. |
588 |
|
|
589 |
=back |
=back |
590 |
|
|
591 |
=cut |
=cut |
592 |
|
|
593 |
sub CreateTable { |
sub CreateTable { |
594 |
# Get the parameters. |
# Get the parameters. |
595 |
my ($self, $relationName, $indexFlag) = @_; |
my ($self, $relationName, $indexFlag, $estimatedRows) = @_; |
596 |
# Get the database handle. |
# Get the database handle. |
597 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
598 |
# Get the relation data and determine whether or not the relation is primary. |
# Get the relation data and determine whether or not the relation is primary. |
616 |
# Insure the table is not already there. |
# Insure the table is not already there. |
617 |
$dbh->drop_table(tbl => $relationName); |
$dbh->drop_table(tbl => $relationName); |
618 |
Trace("Table $relationName dropped.") if T(2); |
Trace("Table $relationName dropped.") if T(2); |
619 |
|
# If there are estimated rows, create an estimate so we can take advantage of |
620 |
|
# faster DB technologies. |
621 |
|
my $estimation = undef; |
622 |
|
if ($estimatedRows) { |
623 |
|
$estimation = [$self->EstimateRowSize($relationName), $estimatedRows]; |
624 |
|
} |
625 |
# Create the table. |
# Create the table. |
626 |
Trace("Creating table $relationName: $fieldThing") if T(2); |
Trace("Creating table $relationName: $fieldThing") if T(2); |
627 |
$dbh->create_table(tbl => $relationName, flds => $fieldThing); |
$dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation); |
628 |
Trace("Relation $relationName created in database.") if T(2); |
Trace("Relation $relationName created in database.") if T(2); |
629 |
# If we want to build the indexes, we do it here. |
# If we want to build the indexes, we do it here. |
630 |
if ($indexFlag) { |
if ($indexFlag) { |
634 |
|
|
635 |
=head3 CreateIndex |
=head3 CreateIndex |
636 |
|
|
637 |
C<< $database->CreateIndex($relationName); >> |
C<< $erdb->CreateIndex($relationName); >> |
638 |
|
|
639 |
Create the indexes for a relation. If a table is being loaded from a large source file (as |
Create the indexes for a relation. If a table is being loaded from a large source file (as |
640 |
is the case in L</LoadTable>), it is sometimes best to create the indexes after the load. |
is the case in L</LoadTable>), it is sometimes best to create the indexes after the load. |
660 |
# Get the index's uniqueness flag. |
# Get the index's uniqueness flag. |
661 |
my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false'); |
my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false'); |
662 |
# Create the index. |
# Create the index. |
663 |
$dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique); |
my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName, |
664 |
|
flds => $flds, unique => $unique); |
665 |
|
if ($rv) { |
666 |
Trace("Index created: $indexName for $relationName ($flds)") if T(1); |
Trace("Index created: $indexName for $relationName ($flds)") if T(1); |
667 |
|
} else { |
668 |
|
Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message()); |
669 |
|
} |
670 |
} |
} |
671 |
} |
} |
672 |
|
|
673 |
=head3 LoadTables |
=head3 LoadTables |
674 |
|
|
675 |
C<< my $stats = $database->LoadTables($directoryName, $rebuild); >> |
C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >> |
676 |
|
|
677 |
This method will load the database tables from a directory. The tables must already have been created |
This method will load the database tables from a directory. The tables must already have been created |
678 |
in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name; |
in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name; |
715 |
$directoryName =~ s!/\\$!!; |
$directoryName =~ s!/\\$!!; |
716 |
# Declare the return variable. |
# Declare the return variable. |
717 |
my $retVal = Stats->new(); |
my $retVal = Stats->new(); |
718 |
# Get the metadata structure. |
# Get the relation names. |
719 |
my $metaData = $self->{_metaData}; |
my @relNames = $self->GetTableNames(); |
720 |
# Loop through the entities. |
for my $relationName (@relNames) { |
|
for my $entity (values %{$metaData->{Entities}}) { |
|
|
# Loop through the entity's relations. |
|
|
for my $relationName (keys %{$entity->{Relations}}) { |
|
721 |
# Try to load this relation. |
# Try to load this relation. |
722 |
my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild); |
my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild); |
723 |
# Accumulate the statistics. |
# Accumulate the statistics. |
724 |
$retVal->Accumulate($result); |
$retVal->Accumulate($result); |
725 |
} |
} |
|
} |
|
|
# Loop through the relationships. |
|
|
for my $relationshipName (keys %{$metaData->{Relationships}}) { |
|
|
# Try to load this relationship's relation. |
|
|
my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild); |
|
|
# Accumulate the statistics. |
|
|
$retVal->Accumulate($result); |
|
|
} |
|
726 |
# Add the duration of the load to the statistical object. |
# Add the duration of the load to the statistical object. |
727 |
$retVal->Add('duration', gettimeofday - $startTime); |
$retVal->Add('duration', gettimeofday - $startTime); |
728 |
# Return the accumulated statistics. |
# Return the accumulated statistics. |
729 |
return $retVal; |
return $retVal; |
730 |
} |
} |
731 |
|
|
732 |
|
|
733 |
=head3 GetTableNames |
=head3 GetTableNames |
734 |
|
|
735 |
C<< my @names = $database->GetTableNames; >> |
C<< my @names = $erdb->GetTableNames; >> |
736 |
|
|
737 |
Return a list of the relations required to implement this database. |
Return a list of the relations required to implement this database. |
738 |
|
|
749 |
|
|
750 |
=head3 GetEntityTypes |
=head3 GetEntityTypes |
751 |
|
|
752 |
C<< my @names = $database->GetEntityTypes; >> |
C<< my @names = $erdb->GetEntityTypes; >> |
753 |
|
|
754 |
Return a list of the entity type names. |
Return a list of the entity type names. |
755 |
|
|
764 |
return sort keys %{$entityList}; |
return sort keys %{$entityList}; |
765 |
} |
} |
766 |
|
|
767 |
|
=head3 IsEntity |
768 |
|
|
769 |
|
C<< my $flag = $erdb->IsEntity($entityName); >> |
770 |
|
|
771 |
|
Return TRUE if the parameter is an entity name, else FALSE. |
772 |
|
|
773 |
|
=over 4 |
774 |
|
|
775 |
|
=item entityName |
776 |
|
|
777 |
|
Object name to be tested. |
778 |
|
|
779 |
|
=item RETURN |
780 |
|
|
781 |
|
Returns TRUE if the specified string is an entity name, else FALSE. |
782 |
|
|
783 |
|
=back |
784 |
|
|
785 |
|
=cut |
786 |
|
|
787 |
|
sub IsEntity { |
788 |
|
# Get the parameters. |
789 |
|
my ($self, $entityName) = @_; |
790 |
|
# Test to see if it's an entity. |
791 |
|
return exists $self->{_metaData}->{Entities}->{$entityName}; |
792 |
|
} |
793 |
|
|
794 |
=head3 Get |
=head3 Get |
795 |
|
|
796 |
C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
797 |
|
|
798 |
This method returns a query object for entities of a specified type using a specified filter. |
This method returns a query object for entities of a specified type using a specified filter. |
799 |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each |
801 |
following call requests all B<Genome> objects for the genus specified in the variable |
following call requests all B<Genome> objects for the genus specified in the variable |
802 |
$genus. |
$genus. |
803 |
|
|
804 |
C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >> |
805 |
|
|
806 |
The WHERE clause contains a single question mark, so there is a single additional |
The WHERE clause contains a single question mark, so there is a single additional |
807 |
parameter representing the parameter value. It would also be possible to code |
parameter representing the parameter value. It would also be possible to code |
808 |
|
|
809 |
C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> |
C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> |
810 |
|
|
811 |
however, this version of the call would generate a syntax error if there were any quote |
however, this version of the call would generate a syntax error if there were any quote |
812 |
characters inside the variable C<$genus>. |
characters inside the variable C<$genus>. |
818 |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
It is possible to specify multiple entity and relationship names in order to retrieve more than |
819 |
one object's data at the same time, which allows highly complex joined queries. For example, |
one object's data at the same time, which allows highly complex joined queries. For example, |
820 |
|
|
821 |
C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >> |
C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >> |
822 |
|
|
823 |
If multiple names are specified, then the query processor will automatically determine a |
If multiple names are specified, then the query processor will automatically determine a |
824 |
join path between the entities and relationships. The algorithm used is very simplistic. |
join path between the entities and relationships. The algorithm used is very simplistic. |
981 |
$command .= " ORDER BY $orderClause"; |
$command .= " ORDER BY $orderClause"; |
982 |
} |
} |
983 |
} |
} |
984 |
Trace("SQL query: $command") if T(2); |
Trace("SQL query: $command") if T(3); |
985 |
Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0)); |
Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0)); |
986 |
my $sth = $dbh->prepare_command($command); |
my $sth = $dbh->prepare_command($command); |
987 |
# Execute it with the parameters bound in. |
# Execute it with the parameters bound in. |
988 |
$sth->execute(@params) || Confess("SELECT error" . $sth->errstr()); |
$sth->execute(@params) || Confess("SELECT error" . $sth->errstr()); |
993 |
|
|
994 |
=head3 GetList |
=head3 GetList |
995 |
|
|
996 |
C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >> |
997 |
|
|
998 |
Return a list of object descriptors for the specified objects as determined by the |
Return a list of object descriptors for the specified objects as determined by the |
999 |
specified filter clause. |
specified filter clause. |
1056 |
|
|
1057 |
=head3 ComputeObjectSentence |
=head3 ComputeObjectSentence |
1058 |
|
|
1059 |
C<< my $sentence = $database->ComputeObjectSentence($objectName); >> |
C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >> |
1060 |
|
|
1061 |
Check an object name, and if it is a relationship convert it to a relationship sentence. |
Check an object name, and if it is a relationship convert it to a relationship sentence. |
1062 |
|
|
1091 |
|
|
1092 |
=head3 DumpRelations |
=head3 DumpRelations |
1093 |
|
|
1094 |
C<< $database->DumpRelations($outputDirectory); >> |
C<< $erdb->DumpRelations($outputDirectory); >> |
1095 |
|
|
1096 |
Write the contents of all the relations to tab-delimited files in the specified directory. |
Write the contents of all the relations to tab-delimited files in the specified directory. |
1097 |
Each file will have the same name as the relation dumped, with an extension of DTX. |
Each file will have the same name as the relation dumped, with an extension of DTX. |
1133 |
|
|
1134 |
=head3 InsertObject |
=head3 InsertObject |
1135 |
|
|
1136 |
C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >> |
C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >> |
1137 |
|
|
1138 |
Insert an object into the database. The object is defined by a type name and then a hash |
Insert an object into the database. The object is defined by a type name and then a hash |
1139 |
of field names to values. Field values in the primary relation are represented by scalars. |
of field names to values. Field values in the primary relation are represented by scalars. |
1142 |
example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases |
example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases |
1143 |
C<ZP_00210270.1> and C<gi|46206278>. |
C<ZP_00210270.1> and C<gi|46206278>. |
1144 |
|
|
1145 |
C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >> |
C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >> |
1146 |
|
|
1147 |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
1148 |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
1149 |
|
|
1150 |
C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
1151 |
|
|
1152 |
=over 4 |
=over 4 |
1153 |
|
|
1272 |
|
|
1273 |
=head3 LoadTable |
=head3 LoadTable |
1274 |
|
|
1275 |
C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >> |
C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >> |
1276 |
|
|
1277 |
Load data from a tab-delimited file into a specified table, optionally re-creating the table |
Load data from a tab-delimited file into a specified table, optionally re-creating the table |
1278 |
first. |
first. |
1308 |
Trace("Loading table $relationName from $fileName") if T(2); |
Trace("Loading table $relationName from $fileName") if T(2); |
1309 |
# Get the database handle. |
# Get the database handle. |
1310 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
1311 |
|
# Get the input file size. |
1312 |
|
my $fileSize = -s $fileName; |
1313 |
# Get the relation data. |
# Get the relation data. |
1314 |
my $relation = $self->_FindRelation($relationName); |
my $relation = $self->_FindRelation($relationName); |
1315 |
# Check the truncation flag. |
# Check the truncation flag. |
1316 |
if ($truncateFlag) { |
if ($truncateFlag) { |
1317 |
Trace("Creating table $relationName") if T(2); |
Trace("Creating table $relationName") if T(2); |
1318 |
|
# Compute the row count estimate. We take the size of the load file, |
1319 |
|
# divide it by the estimated row size, and then multiply by 1.5 to |
1320 |
|
# leave extra room. We postulate a minimum row count of 1000 to |
1321 |
|
# prevent problems with incoming empty load files. |
1322 |
|
my $rowSize = $self->EstimateRowSize($relationName); |
1323 |
|
my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000); |
1324 |
# Re-create the table without its index. |
# Re-create the table without its index. |
1325 |
$self->CreateTable($relationName, 0); |
$self->CreateTable($relationName, 0, $estimate); |
1326 |
# If this is a pre-index DBMS, create the index here. |
# If this is a pre-index DBMS, create the index here. |
1327 |
if ($dbh->{_preIndex}) { |
if ($dbh->{_preIndex}) { |
1328 |
eval { |
eval { |
1333 |
} |
} |
1334 |
} |
} |
1335 |
} |
} |
|
# Determine whether or not this is a primary relation. Primary relations have an extra |
|
|
# field indicating whether or not a given object is new or was loaded from the flat files. |
|
|
my $primary = $self->_IsPrimary($relationName); |
|
|
# Get the number of fields in this relation. |
|
|
my @fieldList = @{$relation->{Fields}}; |
|
|
my $fieldCount = @fieldList; |
|
|
# Start a database transaction. |
|
|
$dbh->begin_tran; |
|
|
# Open the relation file. We need to create a cleaned-up copy before loading. |
|
|
open TABLEIN, '<', $fileName; |
|
|
my $tempName = "$fileName.tbl"; |
|
|
open TABLEOUT, '>', $tempName; |
|
|
my $inputCount = 0; |
|
|
# Loop through the file. |
|
|
while (<TABLEIN>) { |
|
|
$inputCount++; |
|
|
# Chop off the new-line character. |
|
|
my $record = Tracer::Strip($_); |
|
|
# Only proceed if the record is non-blank. |
|
|
if ($record) { |
|
|
# Escape all the backslashes found in the line. |
|
|
$record =~ s/\\/\\\\/g; |
|
|
# Insure the number of fields is correct. |
|
|
my @fields = split /\t/, $record; |
|
|
while (@fields > $fieldCount) { |
|
|
my $extraField = $fields[$#fields]; |
|
|
delete $fields[$#fields]; |
|
|
if ($extraField) { |
|
|
Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1); |
|
|
} |
|
|
} |
|
|
while (@fields < $fieldCount) { |
|
|
push @fields, ""; |
|
|
} |
|
|
# If this is a primary relation, add a 0 for the new-record flag (indicating that |
|
|
# this record is not new, but part of the original load). |
|
|
if ($primary) { |
|
|
push @fields, "0"; |
|
|
} |
|
|
# Write the record. |
|
|
$record = join "\t", @fields; |
|
|
print TABLEOUT "$record\n"; |
|
|
# Count the record written. |
|
|
my $count = $retVal->Add('records'); |
|
|
my $len = length $record; |
|
|
Trace("Record $count written with $len characters.") if T(4); |
|
|
} else { |
|
|
# Here we have a blank record. |
|
|
$retVal->Add('skipped'); |
|
|
} |
|
|
} |
|
|
# Close the files. |
|
|
close TABLEIN; |
|
|
close TABLEOUT; |
|
|
Trace("Temporary file $tempName created.") if T(2); |
|
1336 |
# Load the table. |
# Load the table. |
1337 |
my $rv; |
my $rv; |
1338 |
eval { |
eval { |
1339 |
$rv = $dbh->load_table(file => $tempName, tbl => $relationName); |
$rv = $dbh->load_table(file => $fileName, tbl => $relationName); |
1340 |
}; |
}; |
1341 |
if (!defined $rv) { |
if (!defined $rv) { |
1342 |
$retVal->AddMessage($@) if ($@); |
$retVal->AddMessage($@) if ($@); |
1343 |
$retVal->AddMessage("Table load failed for $relationName using $tempName."); |
$retVal->AddMessage("Table load failed for $relationName using $fileName."); |
1344 |
Trace("Table load failed for $relationName.") if T(1); |
Trace("Table load failed for $relationName.") if T(1); |
1345 |
} else { |
} else { |
1346 |
# Here we successfully loaded the table. Trace the number of records loaded. |
# Here we successfully loaded the table. |
1347 |
Trace("$retVal->{records} records read for $relationName.") if T(2); |
$retVal->Add("tables"); |
1348 |
|
my $size = -s $fileName; |
1349 |
|
Trace("$size bytes loaded into $relationName.") if T(2); |
1350 |
# If we're rebuilding, we need to create the table indexes. |
# If we're rebuilding, we need to create the table indexes. |
1351 |
if ($truncateFlag && ! $dbh->{_preIndex}) { |
if ($truncateFlag && ! $dbh->{_preIndex}) { |
1352 |
eval { |
eval { |
1356 |
$retVal->AddMessage($@); |
$retVal->AddMessage($@); |
1357 |
} |
} |
1358 |
} |
} |
|
# Analyze the table to help optimize tables. |
|
1359 |
} |
} |
1360 |
# Commit the database changes. |
# Analyze the table to improve performance. |
|
$dbh->commit_tran; |
|
1361 |
$dbh->vacuum_it($relationName); |
$dbh->vacuum_it($relationName); |
|
# Delete the temporary file. |
|
|
unlink $tempName; |
|
1362 |
# Return the statistics. |
# Return the statistics. |
1363 |
return $retVal; |
return $retVal; |
1364 |
} |
} |
1365 |
|
|
1366 |
=head3 GenerateEntity |
=head3 GenerateEntity |
1367 |
|
|
1368 |
C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >> |
C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >> |
1369 |
|
|
1370 |
Generate the data for a new entity instance. This method creates a field hash suitable for |
Generate the data for a new entity instance. This method creates a field hash suitable for |
1371 |
passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest |
passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest |
1423 |
|
|
1424 |
=head3 GetEntity |
=head3 GetEntity |
1425 |
|
|
1426 |
C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >> |
C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >> |
1427 |
|
|
1428 |
Return an object describing the entity instance with a specified ID. |
Return an object describing the entity instance with a specified ID. |
1429 |
|
|
1459 |
|
|
1460 |
=head3 GetEntityValues |
=head3 GetEntityValues |
1461 |
|
|
1462 |
C<< my @values = GetEntityValues($entityType, $ID, \@fields); >> |
C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >> |
1463 |
|
|
1464 |
Return a list of values from a specified entity instance. |
Return a list of values from a specified entity instance. |
1465 |
|
|
1502 |
|
|
1503 |
=head3 GetAll |
=head3 GetAll |
1504 |
|
|
1505 |
C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >> |
C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >> |
1506 |
|
|
1507 |
Return a list of values taken from the objects returned by a query. The first three |
Return a list of values taken from the objects returned by a query. The first three |
1508 |
parameters correspond to the parameters of the L</Get> method. The final parameter is |
parameters correspond to the parameters of the L</Get> method. The final parameter is |
1518 |
spreadsheet cell, and each feature will be represented by a list containing the |
spreadsheet cell, and each feature will be represented by a list containing the |
1519 |
feature ID followed by all of its aliases. |
feature ID followed by all of its aliases. |
1520 |
|
|
1521 |
C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >> |
C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >> |
1522 |
|
|
1523 |
=over 4 |
=over 4 |
1524 |
|
|
1587 |
return @retVal; |
return @retVal; |
1588 |
} |
} |
1589 |
|
|
1590 |
|
=head3 EstimateRowSize |
1591 |
|
|
1592 |
|
C<< my $rowSize = $erdb->EstimateRowSize($relName); >> |
1593 |
|
|
1594 |
|
Estimate the row size of the specified relation. The estimated row size is computed by adding |
1595 |
|
up the average length for each data type. |
1596 |
|
|
1597 |
|
=over 4 |
1598 |
|
|
1599 |
|
=item relName |
1600 |
|
|
1601 |
|
Name of the relation whose estimated row size is desired. |
1602 |
|
|
1603 |
|
=item RETURN |
1604 |
|
|
1605 |
|
Returns an estimate of the row size for the specified relation. |
1606 |
|
|
1607 |
|
=back |
1608 |
|
|
1609 |
|
=cut |
1610 |
|
#: Return Type $; |
1611 |
|
sub EstimateRowSize { |
1612 |
|
# Get the parameters. |
1613 |
|
my ($self, $relName) = @_; |
1614 |
|
# Declare the return variable. |
1615 |
|
my $retVal = 0; |
1616 |
|
# Find the relation descriptor. |
1617 |
|
my $relation = $self->_FindRelation($relName); |
1618 |
|
# Get the list of fields. |
1619 |
|
for my $fieldData (@{$relation->{Fields}}) { |
1620 |
|
# Get the field type and add its length. |
1621 |
|
my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen}; |
1622 |
|
$retVal += $fieldLen; |
1623 |
|
} |
1624 |
|
# Return the result. |
1625 |
|
return $retVal; |
1626 |
|
} |
1627 |
|
|
1628 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
1629 |
|
|
1630 |
=head3 GetLoadStats |
=head3 GetLoadStats |
2207 |
my @fromList = (); |
my @fromList = (); |
2208 |
my @toList = (); |
my @toList = (); |
2209 |
my @bothList = (); |
my @bothList = (); |
2210 |
Trace("Join table build for $entityName.") if T(3); |
Trace("Join table build for $entityName.") if T(metadata => 4); |
2211 |
for my $relationshipName (keys %{$relationshipList}) { |
for my $relationshipName (keys %{$relationshipList}) { |
2212 |
my $relationship = $relationshipList->{$relationshipName}; |
my $relationship = $relationshipList->{$relationshipName}; |
2213 |
# Determine if this relationship has our entity in one of its link fields. |
# Determine if this relationship has our entity in one of its link fields. |
2214 |
my $fromEntity = $relationship->{from}; |
my $fromEntity = $relationship->{from}; |
2215 |
my $toEntity = $relationship->{to}; |
my $toEntity = $relationship->{to}; |
2216 |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3); |
Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4); |
2217 |
if ($fromEntity eq $entityName) { |
if ($fromEntity eq $entityName) { |
2218 |
if ($toEntity eq $entityName) { |
if ($toEntity eq $entityName) { |
2219 |
# Here the relationship is recursive. |
# Here the relationship is recursive. |
2220 |
push @bothList, $relationshipName; |
push @bothList, $relationshipName; |
2221 |
Trace("Relationship $relationshipName put in both-list.") if T(3); |
Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4); |
2222 |
} else { |
} else { |
2223 |
# Here the relationship comes from the entity. |
# Here the relationship comes from the entity. |
2224 |
push @fromList, $relationshipName; |
push @fromList, $relationshipName; |
2225 |
Trace("Relationship $relationshipName put in from-list.") if T(3); |
Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4); |
2226 |
} |
} |
2227 |
} elsif ($toEntity eq $entityName) { |
} elsif ($toEntity eq $entityName) { |
2228 |
# Here the relationship goes to the entity. |
# Here the relationship goes to the entity. |
2229 |
push @toList, $relationshipName; |
push @toList, $relationshipName; |
2230 |
Trace("Relationship $relationshipName put in to-list.") if T(3); |
Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4); |
2231 |
} |
} |
2232 |
} |
} |
2233 |
# Create the nonrecursive joins. Note that we build two hashes for running |
# Create the nonrecursive joins. Note that we build two hashes for running |
2243 |
# Create joins between the entity and this relationship. |
# Create joins between the entity and this relationship. |
2244 |
my $linkField = "$relationshipName.${linkType}_link"; |
my $linkField = "$relationshipName.${linkType}_link"; |
2245 |
my $joinClause = "$entityName.id = $linkField"; |
my $joinClause = "$entityName.id = $linkField"; |
2246 |
Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4); |
Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4); |
2247 |
$joinTable{"$entityName/$relationshipName"} = $joinClause; |
$joinTable{"$entityName/$relationshipName"} = $joinClause; |
2248 |
$joinTable{"$relationshipName/$entityName"} = $joinClause; |
$joinTable{"$relationshipName/$entityName"} = $joinClause; |
2249 |
# Create joins between this relationship and the other relationships. |
# Create joins between this relationship and the other relationships. |
2264 |
# relationship and itself are prohibited. |
# relationship and itself are prohibited. |
2265 |
my $relJoinClause = "$otherName.${otherType}_link = $linkField"; |
my $relJoinClause = "$otherName.${otherType}_link = $linkField"; |
2266 |
$joinTable{$joinKey} = $relJoinClause; |
$joinTable{$joinKey} = $relJoinClause; |
2267 |
Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4); |
Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4); |
2268 |
} |
} |
2269 |
} |
} |
2270 |
} |
} |
2273 |
# relationship can only be ambiguous with another recursive relationship, |
# relationship can only be ambiguous with another recursive relationship, |
2274 |
# and the incoming relationship from the outer loop is never recursive. |
# and the incoming relationship from the outer loop is never recursive. |
2275 |
for my $otherName (@bothList) { |
for my $otherName (@bothList) { |
2276 |
Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3); |
Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4); |
2277 |
# Join from the left. |
# Join from the left. |
2278 |
$joinTable{"$relationshipName/$otherName"} = |
$joinTable{"$relationshipName/$otherName"} = |
2279 |
"$linkField = $otherName.from_link"; |
"$linkField = $otherName.from_link"; |
2288 |
# rise to situations where we can't create the path we want; however, it is always |
# rise to situations where we can't create the path we want; however, it is always |
2289 |
# possible to get the same effect using multiple queries. |
# possible to get the same effect using multiple queries. |
2290 |
for my $relationshipName (@bothList) { |
for my $relationshipName (@bothList) { |
2291 |
Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3); |
Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4); |
2292 |
# Join to the entity from each direction. |
# Join to the entity from each direction. |
2293 |
$joinTable{"$entityName/$relationshipName"} = |
$joinTable{"$entityName/$relationshipName"} = |
2294 |
"$entityName.id = $relationshipName.from_link"; |
"$entityName.id = $relationshipName.from_link"; |