[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Tue Jun 28 23:51:06 2005 UTC revision 1.24, Wed Oct 12 02:56:04 2005 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 300  Line 301 
301  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
302  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
303  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
304   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
305  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            dataGen => "StringGen('A')" },  # record sizes.
306                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
307                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
308                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
309                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
310                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
311                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
312                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   2, dataGen => "IntGen(0, 1)" },
313                   'key-string' =>                   'key-string' =>
314                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
315                   'name-string' =>                   'name-string' =>
316                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
317                   'medium-string' =>                   'medium-string' =>
318                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
319                  );                  );
320    
321  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 369  Line 371 
371    
372  =head3 ShowMetaData  =head3 ShowMetaData
373    
374  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
375    
376  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
377  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 524  Line 526 
526    
527  =head3 DumpMetaData  =head3 DumpMetaData
528    
529  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
530    
531  Return a dump of the metadata structure.  Return a dump of the metadata structure.
532    
# Line 539  Line 541 
541    
542  =head3 CreateTables  =head3 CreateTables
543    
544  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
545    
546  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
547  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 551  Line 553 
553  sub CreateTables {  sub CreateTables {
554      # Get the parameters.      # Get the parameters.
555      my ($self) = @_;      my ($self) = @_;
556      my $metadata = $self->{_metaData};      # Get the relation names.
557      my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
558      # Loop through the entities.      # Loop through the relations.
559      my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
     for my $entityName (keys %{$entityHash}) {  
         my $entityData = $entityHash->{$entityName};  
         # Tell the user what we're doing.  
         Trace("Creating relations for entity $entityName.") if T(1);  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entityData->{Relations}}) {  
560              # Create a table for this relation.              # Create a table for this relation.
561              $self->CreateTable($relationName);              $self->CreateTable($relationName);
562              Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
         }  
     }  
     # Loop through the relationships.  
     my $relationshipTable = $metadata->{Relationships};  
     for my $relationshipName (keys %{$metadata->{Relationships}}) {  
         # Create a table for this relationship.  
         Trace("Creating relationship $relationshipName.") if T(1);  
         $self->CreateTable($relationshipName);  
563      }      }
564  }  }
565    
566  =head3 CreateTable  =head3 CreateTable
567    
568  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
569    
570  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
571    
# Line 587  Line 575 
575    
576  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
577    
578  =item $indexFlag  =item indexFlag
579    
580  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
581  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
582    
583    =item estimatedRows (optional)
584    
585    If specified, the estimated maximum number of rows for the relation. This
586    information allows the creation of tables using storage engines that are
587    faster but require size estimates, such as MyISAM.
588    
589  =back  =back
590    
591  =cut  =cut
592    
593  sub CreateTable {  sub CreateTable {
594      # Get the parameters.      # Get the parameters.
595      my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
596      # Get the database handle.      # Get the database handle.
597      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
598      # Get the relation data and determine whether or not the relation is primary.      # Get the relation data and determine whether or not the relation is primary.
# Line 622  Line 616 
616      # Insure the table is not already there.      # Insure the table is not already there.
617      $dbh->drop_table(tbl => $relationName);      $dbh->drop_table(tbl => $relationName);
618      Trace("Table $relationName dropped.") if T(2);      Trace("Table $relationName dropped.") if T(2);
619        # If there are estimated rows, create an estimate so we can take advantage of
620        # faster DB technologies.
621        my $estimation = undef;
622        if ($estimatedRows) {
623            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
624        }
625      # Create the table.      # Create the table.
626      Trace("Creating table $relationName: $fieldThing") if T(2);      Trace("Creating table $relationName: $fieldThing") if T(2);
627      $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
628      Trace("Relation $relationName created in database.") if T(2);      Trace("Relation $relationName created in database.") if T(2);
629      # If we want to build the indexes, we do it here.      # If we want to build the indexes, we do it here.
630      if ($indexFlag) {      if ($indexFlag) {
# Line 634  Line 634 
634    
635  =head3 CreateIndex  =head3 CreateIndex
636    
637  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
638    
639  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
640  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
# Line 660  Line 660 
660          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
661          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
662          # Create the index.          # Create the index.
663          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
664                                        flds => $flds, unique => $unique);
665            if ($rv) {
666          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
667            } else {
668                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
669            }
670      }      }
671  }  }
672    
673  =head3 LoadTables  =head3 LoadTables
674    
675  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
676    
677  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
678  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 710  Line 715 
715      $directoryName =~ s!/\\$!!;      $directoryName =~ s!/\\$!!;
716      # Declare the return variable.      # Declare the return variable.
717      my $retVal = Stats->new();      my $retVal = Stats->new();
718      # Get the metadata structure.      # Get the relation names.
719      my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
720      # Loop through the entities.      for my $relationName (@relNames) {
     for my $entity (values %{$metaData->{Entities}}) {  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entity->{Relations}}) {  
721              # Try to load this relation.              # Try to load this relation.
722              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
723              # Accumulate the statistics.              # Accumulate the statistics.
724              $retVal->Accumulate($result);              $retVal->Accumulate($result);
725          }          }
     }  
     # Loop through the relationships.  
     for my $relationshipName (keys %{$metaData->{Relationships}}) {  
         # Try to load this relationship's relation.  
         my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
         # Accumulate the statistics.  
         $retVal->Accumulate($result);  
     }  
726      # Add the duration of the load to the statistical object.      # Add the duration of the load to the statistical object.
727      $retVal->Add('duration', gettimeofday - $startTime);      $retVal->Add('duration', gettimeofday - $startTime);
728      # Return the accumulated statistics.      # Return the accumulated statistics.
729      return $retVal;      return $retVal;
730  }  }
731    
732    
733  =head3 GetTableNames  =head3 GetTableNames
734    
735  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
736    
737  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
738    
# Line 754  Line 749 
749    
750  =head3 GetEntityTypes  =head3 GetEntityTypes
751    
752  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
753    
754  Return a list of the entity type names.  Return a list of the entity type names.
755    
# Line 769  Line 764 
764      return sort keys %{$entityList};      return sort keys %{$entityList};
765  }  }
766    
767    =head3 IsEntity
768    
769    C<< my $flag = $erdb->IsEntity($entityName); >>
770    
771    Return TRUE if the parameter is an entity name, else FALSE.
772    
773    =over 4
774    
775    =item entityName
776    
777    Object name to be tested.
778    
779    =item RETURN
780    
781    Returns TRUE if the specified string is an entity name, else FALSE.
782    
783    =back
784    
785    =cut
786    
787    sub IsEntity {
788        # Get the parameters.
789        my ($self, $entityName) = @_;
790        # Test to see if it's an entity.
791        return exists $self->{_metaData}->{Entities}->{$entityName};
792    }
793    
794  =head3 Get  =head3 Get
795    
796  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
797    
798  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
799  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 779  Line 801 
801  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
802  $genus.  $genus.
803    
804  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
805    
806  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
807  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
808    
809  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
810    
811  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
812  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 796  Line 818 
818  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
819  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
820    
821  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
822    
823  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
824  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
# Line 959  Line 981 
981              $command .= " ORDER BY $orderClause";              $command .= " ORDER BY $orderClause";
982          }          }
983      }      }
984      Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(3);
985      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));
986      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
987      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
988      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
# Line 971  Line 993 
993    
994  =head3 GetList  =head3 GetList
995    
996  C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
997    
998  Return a list of object descriptors for the specified objects as determined by the  Return a list of object descriptors for the specified objects as determined by the
999  specified filter clause.  specified filter clause.
# Line 1034  Line 1056 
1056    
1057  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1058    
1059  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1060    
1061  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1062    
# Line 1069  Line 1091 
1091    
1092  =head3 DumpRelations  =head3 DumpRelations
1093    
1094  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1095    
1096  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1097  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 1111  Line 1133 
1133    
1134  =head3 InsertObject  =head3 InsertObject
1135    
1136  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1137    
1138  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1139  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 1120  Line 1142 
1142  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1143  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1144    
1145  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1146    
1147  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1148  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1149    
1150  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1151    
1152  =over 4  =over 4
1153    
# Line 1250  Line 1272 
1272    
1273  =head3 LoadTable  =head3 LoadTable
1274    
1275  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1276    
1277  Load data from a tab-delimited file into a specified table, optionally re-creating the table  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1278  first.  first.
# Line 1286  Line 1308 
1308      Trace("Loading table $relationName from $fileName") if T(2);      Trace("Loading table $relationName from $fileName") if T(2);
1309      # Get the database handle.      # Get the database handle.
1310      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1311        # Start a transaction.
1312        $dbh->begin_tran;
1313        # Get the input file size.
1314        my $fileSize = -s $fileName;
1315      # Get the relation data.      # Get the relation data.
1316      my $relation = $self->_FindRelation($relationName);      my $relation = $self->_FindRelation($relationName);
1317      # Check the truncation flag.      # Check the truncation flag.
1318      if ($truncateFlag) {      if ($truncateFlag) {
1319          Trace("Creating table $relationName") if T(2);          Trace("Creating table $relationName") if T(2);
1320            # Compute the row count estimate. We take the size of the load file,
1321            # divide it by the estimated row size, and then multiply by 1.5 to
1322            # leave extra room. We postulate a minimum row count of 1000 to
1323            # prevent problems with incoming empty load files.
1324            my $rowSize = $self->EstimateRowSize($relationName);
1325            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1326          # Re-create the table without its index.          # Re-create the table without its index.
1327          $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1328          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
1329          if ($dbh->{_preIndex}) {          if ($dbh->{_preIndex}) {
1330              eval {              eval {
# Line 1303  Line 1335 
1335              }              }
1336          }          }
1337      }      }
     # Determine whether or not this is a primary relation. Primary relations have an extra  
     # field indicating whether or not a given object is new or was loaded from the flat files.  
     my $primary = $self->_IsPrimary($relationName);  
     # Get the number of fields in this relation.  
     my @fieldList = @{$relation->{Fields}};  
     my $fieldCount = @fieldList;  
     # Start a database transaction.  
     $dbh->begin_tran;  
     # Open the relation file. We need to create a cleaned-up copy before loading.  
     open TABLEIN, '<', $fileName;  
     my $tempName = "$fileName.tbl";  
     open TABLEOUT, '>', $tempName;  
     my $inputCount = 0;  
     # Loop through the file.  
     while (<TABLEIN>) {  
         $inputCount++;  
         # Chop off the new-line character.  
         my $record = Tracer::Strip($_);  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Insure the number of fields is correct.  
             my @fields = split /\t/, $record;  
             while (@fields > $fieldCount) {  
                 my $extraField = $fields[$#fields];  
                 delete $fields[$#fields];  
                 if ($extraField) {  
                     Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1);  
                 }  
             }  
             while (@fields < $fieldCount) {  
                 push @fields, "";  
             }  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 push @fields, "0";  
             }  
             # Write the record.  
             $record = join "\t", @fields;  
             print TABLEOUT "$record\n";  
             # Count the record written.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         } else {  
             # Here we have a blank record.  
             $retVal->Add('skipped');  
         }  
     }  
     # Close the files.  
     close TABLEIN;  
     close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(2);  
1338      # Load the table.      # Load the table.
1339      my $rv;      my $rv;
1340      eval {      eval {
1341          $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1342      };      };
1343      if (!defined $rv) {      if (!defined $rv) {
1344          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1345          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1346          Trace("Table load failed for $relationName.") if T(1);          Trace("Table load failed for $relationName.") if T(1);
1347      } else {      } else {
1348          # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1349          Trace("$retVal->{records} records read for $relationName.") if T(2);          $retVal->Add("tables");
1350            my $size = -s $fileName;
1351            Trace("$size bytes loaded into $relationName.") if T(2);
1352          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
1353          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1354              eval {              eval {
# Line 1379  Line 1358 
1358                  $retVal->AddMessage($@);                  $retVal->AddMessage($@);
1359              }              }
1360          }          }
         # Analyze the table to help optimize tables.  
1361      }      }
1362      # Commit the database changes.      # Commit the database changes.
1363      $dbh->commit_tran;      $dbh->commit_tran;
1364        # Analyze the table to improve performance.
1365      $dbh->vacuum_it($relationName);      $dbh->vacuum_it($relationName);
     # Delete the temporary file.  
     unlink $tempName;  
1366      # Return the statistics.      # Return the statistics.
1367      return $retVal;      return $retVal;
1368  }  }
1369    
1370  =head3 GenerateEntity  =head3 GenerateEntity
1371    
1372  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1373    
1374  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1375  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1450  Line 1427 
1427    
1428  =head3 GetEntity  =head3 GetEntity
1429    
1430  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1431    
1432  Return an object describing the entity instance with a specified ID.  Return an object describing the entity instance with a specified ID.
1433    
# Line 1486  Line 1463 
1463    
1464  =head3 GetEntityValues  =head3 GetEntityValues
1465    
1466  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1467    
1468  Return a list of values from a specified entity instance.  Return a list of values from a specified entity instance.
1469    
# Line 1529  Line 1506 
1506    
1507  =head3 GetAll  =head3 GetAll
1508    
1509  C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1510    
1511  Return a list of values taken from the objects returned by a query. The first three  Return a list of values taken from the objects returned by a query. The first three
1512  parameters correspond to the parameters of the L</Get> method. The final parameter is  parameters correspond to the parameters of the L</Get> method. The final parameter is
# Line 1545  Line 1522 
1522  spreadsheet cell, and each feature will be represented by a list containing the  spreadsheet cell, and each feature will be represented by a list containing the
1523  feature ID followed by all of its aliases.  feature ID followed by all of its aliases.
1524    
1525  C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1526    
1527  =over 4  =over 4
1528    
# Line 1614  Line 1591 
1591      return @retVal;      return @retVal;
1592  }  }
1593    
1594    =head3 EstimateRowSize
1595    
1596    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1597    
1598    Estimate the row size of the specified relation. The estimated row size is computed by adding
1599    up the average length for each data type.
1600    
1601    =over 4
1602    
1603    =item relName
1604    
1605    Name of the relation whose estimated row size is desired.
1606    
1607    =item RETURN
1608    
1609    Returns an estimate of the row size for the specified relation.
1610    
1611    =back
1612    
1613    =cut
1614    #: Return Type $;
1615    sub EstimateRowSize {
1616        # Get the parameters.
1617        my ($self, $relName) = @_;
1618        # Declare the return variable.
1619        my $retVal = 0;
1620        # Find the relation descriptor.
1621        my $relation = $self->_FindRelation($relName);
1622        # Get the list of fields.
1623        for my $fieldData (@{$relation->{Fields}}) {
1624            # Get the field type and add its length.
1625            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1626            $retVal += $fieldLen;
1627        }
1628        # Return the result.
1629        return $retVal;
1630    }
1631    
1632  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1633    
1634  =head3 GetLoadStats  =head3 GetLoadStats
# Line 2196  Line 2211 
2211          my @fromList = ();          my @fromList = ();
2212          my @toList = ();          my @toList = ();
2213          my @bothList = ();          my @bothList = ();
2214          Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
2215          for my $relationshipName (keys %{$relationshipList}) {          for my $relationshipName (keys %{$relationshipList}) {
2216              my $relationship = $relationshipList->{$relationshipName};              my $relationship = $relationshipList->{$relationshipName};
2217              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
2218              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
2219              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
2220              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);
2221              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
2222                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
2223                      # Here the relationship is recursive.                      # Here the relationship is recursive.
2224                      push @bothList, $relationshipName;                      push @bothList, $relationshipName;
2225                      Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2226                  } else {                  } else {
2227                      # Here the relationship comes from the entity.                      # Here the relationship comes from the entity.
2228                      push @fromList, $relationshipName;                      push @fromList, $relationshipName;
2229                      Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2230                  }                  }
2231              } elsif ($toEntity eq $entityName) {              } elsif ($toEntity eq $entityName) {
2232                  # Here the relationship goes to the entity.                  # Here the relationship goes to the entity.
2233                  push @toList, $relationshipName;                  push @toList, $relationshipName;
2234                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2235              }              }
2236          }          }
2237          # Create the nonrecursive joins. Note that we build two hashes for running          # Create the nonrecursive joins. Note that we build two hashes for running
# Line 2232  Line 2247 
2247                  # Create joins between the entity and this relationship.                  # Create joins between the entity and this relationship.
2248                  my $linkField = "$relationshipName.${linkType}_link";                  my $linkField = "$relationshipName.${linkType}_link";
2249                  my $joinClause = "$entityName.id = $linkField";                  my $joinClause = "$entityName.id = $linkField";
2250                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2251                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2252                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2253                  # Create joins between this relationship and the other relationships.                  # Create joins between this relationship and the other relationships.
# Line 2253  Line 2268 
2268                              # relationship and itself are prohibited.                              # relationship and itself are prohibited.
2269                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2270                              $joinTable{$joinKey} = $relJoinClause;                              $joinTable{$joinKey} = $relJoinClause;
2271                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2272                          }                          }
2273                      }                      }
2274                  }                  }
# Line 2262  Line 2277 
2277                  # relationship can only be ambiguous with another recursive relationship,                  # relationship can only be ambiguous with another recursive relationship,
2278                  # and the incoming relationship from the outer loop is never recursive.                  # and the incoming relationship from the outer loop is never recursive.
2279                  for my $otherName (@bothList) {                  for my $otherName (@bothList) {
2280                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2281                      # Join from the left.                      # Join from the left.
2282                      $joinTable{"$relationshipName/$otherName"} =                      $joinTable{"$relationshipName/$otherName"} =
2283                          "$linkField = $otherName.from_link";                          "$linkField = $otherName.from_link";
# Line 2277  Line 2292 
2292          # rise to situations where we can't create the path we want; however, it is always          # rise to situations where we can't create the path we want; however, it is always
2293          # possible to get the same effect using multiple queries.          # possible to get the same effect using multiple queries.
2294          for my $relationshipName (@bothList) {          for my $relationshipName (@bothList) {
2295              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2296              # Join to the entity from each direction.              # Join to the entity from each direction.
2297              $joinTable{"$entityName/$relationshipName"} =              $joinTable{"$entityName/$relationshipName"} =
2298                  "$entityName.id = $relationshipName.from_link";                  "$entityName.id = $relationshipName.from_link";

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3