[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Fri Jun 24 18:31:52 2005 UTC revision 1.26, Tue Oct 18 06:47:46 2005 UTC
# Line 2  Line 2 
2    
3      use strict;      use strict;
4      use Tracer;      use Tracer;
5      use DBKernel;      use DBrtns;
6      use Data::Dumper;      use Data::Dumper;
7      use XML::Simple;      use XML::Simple;
8      use DBQuery;      use DBQuery;
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 300  Line 301 
301  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
302  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
303  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
304   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
305  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            dataGen => "StringGen('A')" },  # record sizes.
306                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
307                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
308                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
309                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
310                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
311                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
312                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   2, dataGen => "IntGen(0, 1)" },
313                   'key-string' =>                   'key-string' =>
314                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
315                   'name-string' =>                   'name-string' =>
316                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
317                   'medium-string' =>                   'medium-string' =>
318                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
319                  );                  );
320    
321  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 369  Line 371 
371    
372  =head3 ShowMetaData  =head3 ShowMetaData
373    
374  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
375    
376  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
377  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 524  Line 526 
526    
527  =head3 DumpMetaData  =head3 DumpMetaData
528    
529  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
530    
531  Return a dump of the metadata structure.  Return a dump of the metadata structure.
532    
# Line 539  Line 541 
541    
542  =head3 CreateTables  =head3 CreateTables
543    
544  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
545    
546  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
547  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 551  Line 553 
553  sub CreateTables {  sub CreateTables {
554      # Get the parameters.      # Get the parameters.
555      my ($self) = @_;      my ($self) = @_;
556      my $metadata = $self->{_metaData};      # Get the relation names.
557      my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
558      # Loop through the entities.      # Loop through the relations.
559      my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
     for my $entityName (keys %{$entityHash}) {  
         my $entityData = $entityHash->{$entityName};  
         # Tell the user what we're doing.  
         Trace("Creating relations for entity $entityName.") if T(1);  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entityData->{Relations}}) {  
560              # Create a table for this relation.              # Create a table for this relation.
561              $self->CreateTable($relationName);              $self->CreateTable($relationName);
562              Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
         }  
     }  
     # Loop through the relationships.  
     my $relationshipTable = $metadata->{Relationships};  
     for my $relationshipName (keys %{$metadata->{Relationships}}) {  
         # Create a table for this relationship.  
         Trace("Creating relationship $relationshipName.") if T(1);  
         $self->CreateTable($relationshipName);  
563      }      }
564  }  }
565    
566  =head3 CreateTable  =head3 CreateTable
567    
568  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
569    
570  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
571    
# Line 587  Line 575 
575    
576  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
577    
578  =item $indexFlag  =item indexFlag
579    
580  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
581  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
582    
583    =item estimatedRows (optional)
584    
585    If specified, the estimated maximum number of rows for the relation. This
586    information allows the creation of tables using storage engines that are
587    faster but require size estimates, such as MyISAM.
588    
589  =back  =back
590    
591  =cut  =cut
592    
593  sub CreateTable {  sub CreateTable {
594      # Get the parameters.      # Get the parameters.
595      my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
596      # Get the database handle.      # Get the database handle.
597      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
598      # Get the relation data and determine whether or not the relation is primary.      # Get the relation data and determine whether or not the relation is primary.
# Line 622  Line 616 
616      # Insure the table is not already there.      # Insure the table is not already there.
617      $dbh->drop_table(tbl => $relationName);      $dbh->drop_table(tbl => $relationName);
618      Trace("Table $relationName dropped.") if T(2);      Trace("Table $relationName dropped.") if T(2);
619        # If there are estimated rows, create an estimate so we can take advantage of
620        # faster DB technologies.
621        my $estimation = undef;
622        if ($estimatedRows) {
623            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
624        }
625      # Create the table.      # Create the table.
626      Trace("Creating table $relationName: $fieldThing") if T(2);      Trace("Creating table $relationName: $fieldThing") if T(2);
627      $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
628      Trace("Relation $relationName created in database.") if T(2);      Trace("Relation $relationName created in database.") if T(2);
629      # If we want to build the indexes, we do it here.      # If we want to build the indexes, we do it here.
630      if ($indexFlag) {      if ($indexFlag) {
# Line 634  Line 634 
634    
635  =head3 CreateIndex  =head3 CreateIndex
636    
637  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
638    
639  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
640  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
# Line 660  Line 660 
660          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
661          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
662          # Create the index.          # Create the index.
663          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
664                                        flds => $flds, unique => $unique);
665            if ($rv) {
666          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
667            } else {
668                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
669            }
670      }      }
671  }  }
672    
673  =head3 LoadTables  =head3 LoadTables
674    
675  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
676    
677  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
678  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 710  Line 715 
715      $directoryName =~ s!/\\$!!;      $directoryName =~ s!/\\$!!;
716      # Declare the return variable.      # Declare the return variable.
717      my $retVal = Stats->new();      my $retVal = Stats->new();
718      # Get the metadata structure.      # Get the relation names.
719      my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
720      # Loop through the entities.      for my $relationName (@relNames) {
     for my $entity (values %{$metaData->{Entities}}) {  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entity->{Relations}}) {  
721              # Try to load this relation.              # Try to load this relation.
722              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
723              # Accumulate the statistics.              # Accumulate the statistics.
724              $retVal->Accumulate($result);              $retVal->Accumulate($result);
725          }          }
     }  
     # Loop through the relationships.  
     for my $relationshipName (keys %{$metaData->{Relationships}}) {  
         # Try to load this relationship's relation.  
         my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
         # Accumulate the statistics.  
         $retVal->Accumulate($result);  
     }  
726      # Add the duration of the load to the statistical object.      # Add the duration of the load to the statistical object.
727      $retVal->Add('duration', gettimeofday - $startTime);      $retVal->Add('duration', gettimeofday - $startTime);
728      # Return the accumulated statistics.      # Return the accumulated statistics.
729      return $retVal;      return $retVal;
730  }  }
731    
732    
733  =head3 GetTableNames  =head3 GetTableNames
734    
735  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
736    
737  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
738    
# Line 754  Line 749 
749    
750  =head3 GetEntityTypes  =head3 GetEntityTypes
751    
752  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
753    
754  Return a list of the entity type names.  Return a list of the entity type names.
755    
# Line 769  Line 764 
764      return sort keys %{$entityList};      return sort keys %{$entityList};
765  }  }
766    
767    =head3 IsEntity
768    
769    C<< my $flag = $erdb->IsEntity($entityName); >>
770    
771    Return TRUE if the parameter is an entity name, else FALSE.
772    
773    =over 4
774    
775    =item entityName
776    
777    Object name to be tested.
778    
779    =item RETURN
780    
781    Returns TRUE if the specified string is an entity name, else FALSE.
782    
783    =back
784    
785    =cut
786    
787    sub IsEntity {
788        # Get the parameters.
789        my ($self, $entityName) = @_;
790        # Test to see if it's an entity.
791        return exists $self->{_metaData}->{Entities}->{$entityName};
792    }
793    
794  =head3 Get  =head3 Get
795    
796  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
797    
798  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
799  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 779  Line 801 
801  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
802  $genus.  $genus.
803    
804  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
805    
806  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
807  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
808    
809  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
810    
811  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
812  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 796  Line 818 
818  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
819  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
820    
821  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
822    
823  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
824  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
# Line 855  Line 877 
877      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
878      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .
879                  join(', ', @{$objectNames});                  join(', ', @{$objectNames});
880        Trace("SQL = $command") if T(SQL => 4);
881      # Check for a filter clause.      # Check for a filter clause.
882      if ($filterClause) {      if ($filterClause) {
883          # Here we have one, so we convert its field names and add it to the query. First,          # Here we have one, so we convert its field names and add it to the query. First,
# Line 959  Line 982 
982              $command .= " ORDER BY $orderClause";              $command .= " ORDER BY $orderClause";
983          }          }
984      }      }
985      Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(3);
986      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));
987      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
988      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
989      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
# Line 971  Line 994 
994    
995  =head3 GetList  =head3 GetList
996    
997  C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
998    
999  Return a list of object descriptors for the specified objects as determined by the  Return a list of object descriptors for the specified objects as determined by the
1000  specified filter clause.  specified filter clause.
# Line 1034  Line 1057 
1057    
1058  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1059    
1060  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1061    
1062  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1063    
# Line 1069  Line 1092 
1092    
1093  =head3 DumpRelations  =head3 DumpRelations
1094    
1095  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1096    
1097  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1098  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 1111  Line 1134 
1134    
1135  =head3 InsertObject  =head3 InsertObject
1136    
1137  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1138    
1139  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1140  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 1120  Line 1143 
1143  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1144  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1145    
1146  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1147    
1148  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1149  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1150    
1151  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1152    
1153  =over 4  =over 4
1154    
# Line 1250  Line 1273 
1273    
1274  =head3 LoadTable  =head3 LoadTable
1275    
1276  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1277    
1278  Load data from a tab-delimited file into a specified table, optionally re-creating the table  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1279  first.  first.
# Line 1286  Line 1309 
1309      Trace("Loading table $relationName from $fileName") if T(2);      Trace("Loading table $relationName from $fileName") if T(2);
1310      # Get the database handle.      # Get the database handle.
1311      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1312        # Get the input file size.
1313        my $fileSize = -s $fileName;
1314      # Get the relation data.      # Get the relation data.
1315      my $relation = $self->_FindRelation($relationName);      my $relation = $self->_FindRelation($relationName);
1316      # Check the truncation flag.      # Check the truncation flag.
1317      if ($truncateFlag) {      if ($truncateFlag) {
1318          Trace("Creating table $relationName") if T(2);          Trace("Creating table $relationName") if T(2);
1319            # Compute the row count estimate. We take the size of the load file,
1320            # divide it by the estimated row size, and then multiply by 1.5 to
1321            # leave extra room. We postulate a minimum row count of 1000 to
1322            # prevent problems with incoming empty load files.
1323            my $rowSize = $self->EstimateRowSize($relationName);
1324            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1325          # Re-create the table without its index.          # Re-create the table without its index.
1326          $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1327          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
1328          if ($dbh->{_preIndex}) {          if ($dbh->{_preIndex}) {
1329              eval {              eval {
# Line 1303  Line 1334 
1334              }              }
1335          }          }
1336      }      }
     # Determine whether or not this is a primary relation. Primary relations have an extra  
     # field indicating whether or not a given object is new or was loaded from the flat files.  
     my $primary = $self->_IsPrimary($relationName);  
     # Get the number of fields in this relation.  
     my @fieldList = @{$relation->{Fields}};  
     my $fieldCount = @fieldList;  
     # Start a database transaction.  
     $dbh->begin_tran;  
     # Open the relation file. We need to create a cleaned-up copy before loading.  
     open TABLEIN, '<', $fileName;  
     my $tempName = "$fileName.tbl";  
     open TABLEOUT, '>', $tempName;  
     my $inputCount = 0;  
     # Loop through the file.  
     while (<TABLEIN>) {  
         $inputCount++;  
         # Chop off the new-line character.  
         my $record = Tracer::Strip($_);  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Insure the number of fields is correct.  
             my @fields = split /\t/, $record;  
             while (@fields > $fieldCount) {  
                 my $extraField = $fields[$#fields];  
                 delete $fields[$#fields];  
                 if ($extraField) {  
                     Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1);  
                 }  
             }  
             while (@fields < $fieldCount) {  
                 push @fields, "";  
             }  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 push @fields, "0";  
             }  
             # Write the record.  
             $record = join "\t", @fields;  
             print TABLEOUT "$record\n";  
             # Count the record written.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         } else {  
             # Here we have a blank record.  
             $retVal->Add('skipped');  
         }  
     }  
     # Close the files.  
     close TABLEIN;  
     close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(2);  
1337      # Load the table.      # Load the table.
1338      my $rv;      my $rv;
1339      eval {      eval {
1340          $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1341      };      };
1342      if (!defined $rv) {      if (!defined $rv) {
1343          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1344          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1345          Trace("Table load failed for $relationName.") if T(1);          Trace("Table load failed for $relationName.") if T(1);
1346      } else {      } else {
1347          # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1348          Trace("$retVal->{records} records read for $relationName.") if T(2);          $retVal->Add("tables");
1349            my $size = -s $fileName;
1350            Trace("$size bytes loaded into $relationName.") if T(2);
1351          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
1352          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1353              eval {              eval {
# Line 1380  Line 1358 
1358              }              }
1359          }          }
1360      }      }
1361      # Commit the database changes.      # Analyze the table to improve performance.
1362      $dbh->commit_tran;      $dbh->vacuum_it($relationName);
     # Delete the temporary file.  
     unlink $tempName;  
1363      # Return the statistics.      # Return the statistics.
1364      return $retVal;      return $retVal;
1365  }  }
1366    
1367  =head3 GenerateEntity  =head3 GenerateEntity
1368    
1369  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1370    
1371  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1372  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1448  Line 1424 
1424    
1425  =head3 GetEntity  =head3 GetEntity
1426    
1427  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1428    
1429  Return an object describing the entity instance with a specified ID.  Return an object describing the entity instance with a specified ID.
1430    
# Line 1484  Line 1460 
1460    
1461  =head3 GetEntityValues  =head3 GetEntityValues
1462    
1463  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1464    
1465  Return a list of values from a specified entity instance.  Return a list of values from a specified entity instance.
1466    
# Line 1527  Line 1503 
1503    
1504  =head3 GetAll  =head3 GetAll
1505    
1506  C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1507    
1508  Return a list of values taken from the objects returned by a query. The first three  Return a list of values taken from the objects returned by a query. The first three
1509  parameters correspond to the parameters of the L</Get> method. The final parameter is  parameters correspond to the parameters of the L</Get> method. The final parameter is
# Line 1543  Line 1519 
1519  spreadsheet cell, and each feature will be represented by a list containing the  spreadsheet cell, and each feature will be represented by a list containing the
1520  feature ID followed by all of its aliases.  feature ID followed by all of its aliases.
1521    
1522  C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1523    
1524  =over 4  =over 4
1525    
# Line 1612  Line 1588 
1588      return @retVal;      return @retVal;
1589  }  }
1590    
1591    =head3 EstimateRowSize
1592    
1593    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1594    
1595    Estimate the row size of the specified relation. The estimated row size is computed by adding
1596    up the average length for each data type.
1597    
1598    =over 4
1599    
1600    =item relName
1601    
1602    Name of the relation whose estimated row size is desired.
1603    
1604    =item RETURN
1605    
1606    Returns an estimate of the row size for the specified relation.
1607    
1608    =back
1609    
1610    =cut
1611    #: Return Type $;
1612    sub EstimateRowSize {
1613        # Get the parameters.
1614        my ($self, $relName) = @_;
1615        # Declare the return variable.
1616        my $retVal = 0;
1617        # Find the relation descriptor.
1618        my $relation = $self->_FindRelation($relName);
1619        # Get the list of fields.
1620        for my $fieldData (@{$relation->{Fields}}) {
1621            # Get the field type and add its length.
1622            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1623            $retVal += $fieldLen;
1624        }
1625        # Return the result.
1626        return $retVal;
1627    }
1628    
1629  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1630    
1631  =head3 GetLoadStats  =head3 GetLoadStats
# Line 1987  Line 2001 
2001  sub _LoadMetaData {  sub _LoadMetaData {
2002      # Get the parameters.      # Get the parameters.
2003      my ($filename) = @_;      my ($filename) = @_;
2004        Trace("Reading Sprout DBD from $filename.") if T(2);
2005      # Slurp the XML file into a variable. Extensive use of options is used to insure we      # Slurp the XML file into a variable. Extensive use of options is used to insure we
2006      # get the exact structure we want.      # get the exact structure we want.
2007      my $metadata = XML::Simple::XMLin($filename,      my $metadata = XML::Simple::XMLin($filename,
# Line 2193  Line 2208 
2208          my @fromList = ();          my @fromList = ();
2209          my @toList = ();          my @toList = ();
2210          my @bothList = ();          my @bothList = ();
2211          Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
2212          for my $relationshipName (keys %{$relationshipList}) {          for my $relationshipName (keys %{$relationshipList}) {
2213              my $relationship = $relationshipList->{$relationshipName};              my $relationship = $relationshipList->{$relationshipName};
2214              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
2215              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
2216              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
2217              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);
2218              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
2219                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
2220                      # Here the relationship is recursive.                      # Here the relationship is recursive.
2221                      push @bothList, $relationshipName;                      push @bothList, $relationshipName;
2222                      Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2223                  } else {                  } else {
2224                      # Here the relationship comes from the entity.                      # Here the relationship comes from the entity.
2225                      push @fromList, $relationshipName;                      push @fromList, $relationshipName;
2226                      Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2227                  }                  }
2228              } elsif ($toEntity eq $entityName) {              } elsif ($toEntity eq $entityName) {
2229                  # Here the relationship goes to the entity.                  # Here the relationship goes to the entity.
2230                  push @toList, $relationshipName;                  push @toList, $relationshipName;
2231                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2232              }              }
2233          }          }
2234          # Create the nonrecursive joins. Note that we build two hashes for running          # Create the nonrecursive joins. Note that we build two hashes for running
# Line 2229  Line 2244 
2244                  # Create joins between the entity and this relationship.                  # Create joins between the entity and this relationship.
2245                  my $linkField = "$relationshipName.${linkType}_link";                  my $linkField = "$relationshipName.${linkType}_link";
2246                  my $joinClause = "$entityName.id = $linkField";                  my $joinClause = "$entityName.id = $linkField";
2247                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2248                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2249                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2250                  # Create joins between this relationship and the other relationships.                  # Create joins between this relationship and the other relationships.
# Line 2250  Line 2265 
2265                              # relationship and itself are prohibited.                              # relationship and itself are prohibited.
2266                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2267                              $joinTable{$joinKey} = $relJoinClause;                              $joinTable{$joinKey} = $relJoinClause;
2268                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2269                          }                          }
2270                      }                      }
2271                  }                  }
# Line 2259  Line 2274 
2274                  # relationship can only be ambiguous with another recursive relationship,                  # relationship can only be ambiguous with another recursive relationship,
2275                  # and the incoming relationship from the outer loop is never recursive.                  # and the incoming relationship from the outer loop is never recursive.
2276                  for my $otherName (@bothList) {                  for my $otherName (@bothList) {
2277                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2278                      # Join from the left.                      # Join from the left.
2279                      $joinTable{"$relationshipName/$otherName"} =                      $joinTable{"$relationshipName/$otherName"} =
2280                          "$linkField = $otherName.from_link";                          "$linkField = $otherName.from_link";
# Line 2274  Line 2289 
2289          # rise to situations where we can't create the path we want; however, it is always          # rise to situations where we can't create the path we want; however, it is always
2290          # possible to get the same effect using multiple queries.          # possible to get the same effect using multiple queries.
2291          for my $relationshipName (@bothList) {          for my $relationshipName (@bothList) {
2292              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2293              # Join to the entity from each direction.              # Join to the entity from each direction.
2294              $joinTable{"$entityName/$relationshipName"} =              $joinTable{"$entityName/$relationshipName"} =
2295                  "$entityName.id = $relationshipName.from_link";                  "$entityName.id = $relationshipName.from_link";

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.26

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3