[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Tue Jun 28 23:51:06 2005 UTC revision 1.41, Wed Mar 29 20:51:37 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 300  Line 301 
301  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
302  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
303  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
304   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
305  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            dataGen => "StringGen('A')" },  # record sizes.
306                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
307                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
308                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
309                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
310                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
311                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
312                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
313                   'key-string' =>                   'key-string' =>
314                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
315                   'name-string' =>                   'name-string' =>
316                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
317                   'medium-string' =>                   'medium-string' =>
318                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
319                  );                  );
320    
321  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 369  Line 371 
371    
372  =head3 ShowMetaData  =head3 ShowMetaData
373    
374  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
375    
376  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
377  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 506  Line 508 
508          # Separate out the source, the target, and the join clause.          # Separate out the source, the target, and the join clause.
509          $joinKey =~ m!^([^/]+)/(.+)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
510          my ($sourceRelation, $targetRelation) = ($1, $2);          my ($sourceRelation, $targetRelation) = ($1, $2);
511          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
512          my $source = $self->ComputeObjectSentence($sourceRelation);          my $source = $self->ComputeObjectSentence($sourceRelation);
513          my $target = $self->ComputeObjectSentence($targetRelation);          my $target = $self->ComputeObjectSentence($targetRelation);
514          my $clause = $joinTable->{$joinKey};          my $clause = $joinTable->{$joinKey};
# Line 524  Line 526 
526    
527  =head3 DumpMetaData  =head3 DumpMetaData
528    
529  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
530    
531  Return a dump of the metadata structure.  Return a dump of the metadata structure.
532    
# Line 539  Line 541 
541    
542  =head3 CreateTables  =head3 CreateTables
543    
544  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
545    
546  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
547  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 551  Line 553 
553  sub CreateTables {  sub CreateTables {
554      # Get the parameters.      # Get the parameters.
555      my ($self) = @_;      my ($self) = @_;
556      my $metadata = $self->{_metaData};      # Get the relation names.
557      my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
558      # Loop through the entities.      # Loop through the relations.
559      my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
     for my $entityName (keys %{$entityHash}) {  
         my $entityData = $entityHash->{$entityName};  
         # Tell the user what we're doing.  
         Trace("Creating relations for entity $entityName.") if T(1);  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entityData->{Relations}}) {  
560              # Create a table for this relation.              # Create a table for this relation.
561              $self->CreateTable($relationName);              $self->CreateTable($relationName);
562              Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
         }  
     }  
     # Loop through the relationships.  
     my $relationshipTable = $metadata->{Relationships};  
     for my $relationshipName (keys %{$metadata->{Relationships}}) {  
         # Create a table for this relationship.  
         Trace("Creating relationship $relationshipName.") if T(1);  
         $self->CreateTable($relationshipName);  
563      }      }
564  }  }
565    
566  =head3 CreateTable  =head3 CreateTable
567    
568  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
569    
570  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
571    
# Line 587  Line 575 
575    
576  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
577    
578  =item $indexFlag  =item indexFlag
579    
580  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
581  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
582    
583    =item estimatedRows (optional)
584    
585    If specified, the estimated maximum number of rows for the relation. This
586    information allows the creation of tables using storage engines that are
587    faster but require size estimates, such as MyISAM.
588    
589  =back  =back
590    
591  =cut  =cut
592    
593  sub CreateTable {  sub CreateTable {
594      # Get the parameters.      # Get the parameters.
595      my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
596      # Get the database handle.      # Get the database handle.
597      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
598      # Get the relation data and determine whether or not the relation is primary.      # Get the relation data and determine whether or not the relation is primary.
# Line 622  Line 616 
616      # Insure the table is not already there.      # Insure the table is not already there.
617      $dbh->drop_table(tbl => $relationName);      $dbh->drop_table(tbl => $relationName);
618      Trace("Table $relationName dropped.") if T(2);      Trace("Table $relationName dropped.") if T(2);
619        # If there are estimated rows, create an estimate so we can take advantage of
620        # faster DB technologies.
621        my $estimation = undef;
622        if ($estimatedRows) {
623            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
624        }
625      # Create the table.      # Create the table.
626      Trace("Creating table $relationName: $fieldThing") if T(2);      Trace("Creating table $relationName: $fieldThing") if T(2);
627      $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
628      Trace("Relation $relationName created in database.") if T(2);      Trace("Relation $relationName created in database.") if T(2);
629      # If we want to build the indexes, we do it here.      # If we want to build the indexes, we do it here.
630      if ($indexFlag) {      if ($indexFlag) {
# Line 632  Line 632 
632      }      }
633  }  }
634    
635    =head3 VerifyFields
636    
637    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
638    
639    Run through the list of proposed field values, insuring that all the character fields are
640    below the maximum length. If any fields are too long, they will be truncated in place.
641    
642    =over 4
643    
644    =item relName
645    
646    Name of the relation for which the specified fields are destined.
647    
648    =item fieldList
649    
650    Reference to a list, in order, of the fields to be put into the relation.
651    
652    =item RETURN
653    
654    Returns the number of fields truncated.
655    
656    =back
657    
658    =cut
659    
660    sub VerifyFields {
661        # Get the parameters.
662        my ($self, $relName, $fieldList) = @_;
663        # Initialize the return value.
664        my $retVal = 0;
665        # Get the relation definition.
666        my $relData = $self->_FindRelation($relName);
667        # Get the list of field descriptors.
668        my $fieldTypes = $relData->{Fields};
669        my $fieldCount = scalar @{$fieldTypes};
670        # Loop through the two lists.
671        for (my $i = 0; $i < $fieldCount; $i++) {
672            # Get the type of the current field.
673            my $fieldType = $fieldTypes->[$i]->{type};
674            # If it's a character field, verify the length.
675            if ($fieldType =~ /string/) {
676                my $maxLen = $TypeTable{$fieldType}->{maxLen};
677                my $oldString = $fieldList->[$i];
678                if (length($oldString) > $maxLen) {
679                    # Here it's too big, so we truncate it.
680                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
681                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
682                    $retVal++;
683                }
684            }
685        }
686        # Return the truncation count.
687        return $retVal;
688    }
689    
690  =head3 CreateIndex  =head3 CreateIndex
691    
692  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
693    
694  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
695  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
# Line 660  Line 715 
715          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
716          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
717          # Create the index.          # Create the index.
718          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
719                                        flds => $flds, unique => $unique);
720            if ($rv) {
721          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
722            } else {
723                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
724            }
725      }      }
726  }  }
727    
728  =head3 LoadTables  =head3 LoadTables
729    
730  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
731    
732  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
733  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 710  Line 770 
770      $directoryName =~ s!/\\$!!;      $directoryName =~ s!/\\$!!;
771      # Declare the return variable.      # Declare the return variable.
772      my $retVal = Stats->new();      my $retVal = Stats->new();
773      # Get the metadata structure.      # Get the relation names.
774      my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
775      # Loop through the entities.      for my $relationName (@relNames) {
     for my $entity (values %{$metaData->{Entities}}) {  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entity->{Relations}}) {  
776              # Try to load this relation.              # Try to load this relation.
777              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
778              # Accumulate the statistics.              # Accumulate the statistics.
779              $retVal->Accumulate($result);              $retVal->Accumulate($result);
780          }          }
     }  
     # Loop through the relationships.  
     for my $relationshipName (keys %{$metaData->{Relationships}}) {  
         # Try to load this relationship's relation.  
         my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
         # Accumulate the statistics.  
         $retVal->Accumulate($result);  
     }  
781      # Add the duration of the load to the statistical object.      # Add the duration of the load to the statistical object.
782      $retVal->Add('duration', gettimeofday - $startTime);      $retVal->Add('duration', gettimeofday - $startTime);
783      # Return the accumulated statistics.      # Return the accumulated statistics.
784      return $retVal;      return $retVal;
785  }  }
786    
787    
788  =head3 GetTableNames  =head3 GetTableNames
789    
790  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
791    
792  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
793    
# Line 754  Line 804 
804    
805  =head3 GetEntityTypes  =head3 GetEntityTypes
806    
807  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
808    
809  Return a list of the entity type names.  Return a list of the entity type names.
810    
# Line 769  Line 819 
819      return sort keys %{$entityList};      return sort keys %{$entityList};
820  }  }
821    
822    =head3 IsEntity
823    
824    C<< my $flag = $erdb->IsEntity($entityName); >>
825    
826    Return TRUE if the parameter is an entity name, else FALSE.
827    
828    =over 4
829    
830    =item entityName
831    
832    Object name to be tested.
833    
834    =item RETURN
835    
836    Returns TRUE if the specified string is an entity name, else FALSE.
837    
838    =back
839    
840    =cut
841    
842    sub IsEntity {
843        # Get the parameters.
844        my ($self, $entityName) = @_;
845        # Test to see if it's an entity.
846        return exists $self->{_metaData}->{Entities}->{$entityName};
847    }
848    
849  =head3 Get  =head3 Get
850    
851  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
852    
853  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
854  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 779  Line 856 
856  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
857  $genus.  $genus.
858    
859  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
860    
861  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
862  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
863    
864  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
865    
866  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
867  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 796  Line 873 
873  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
874  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
875    
876  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
877    
878  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
879  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
880  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
881  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
882  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
883  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
884  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
885    
886    If an entity or relationship is mentioned twice, the name for the second occurrence will
887    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
888    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
889    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
890    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
891    
892  =over 4  =over 4
893    
894  =item objectNames  =item objectNames
# Line 829  Line 911 
911    
912  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
913    
914    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
915    be processed. The idea is to make it less likely to find the verb by accident.
916    
917  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
918  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
919  relation.  relation.
920    
921    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
922    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
923    a positive number. So, for example
924    
925    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
926    
927    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
928    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
929    use
930    
931    C<< "LIMIT 10" >>
932    
933  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
934    
935  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 848  Line 945 
945  sub Get {  sub Get {
946      # Get the parameters.      # Get the parameters.
947      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, @params) = @_;
948        # Adjust the list of object names to account for multiple occurrences of the
949        # same object. We start with a hash table keyed on object name that will
950        # return the object suffix. The first time an object is encountered it will
951        # not be found in the hash. The next time the hash will map the object name
952        # to 2, then 3, and so forth.
953        my %objectHash = ();
954        # This list will contain the object names as they are to appear in the
955        # FROM list.
956        my @fromList = ();
957        # This list contains the suffixed object name for each object. It is exactly
958        # parallel to the list in the $objectNames parameter.
959        my @mappedNameList = ();
960        # Finally, this hash translates from a mapped name to its original object name.
961        my %mappedNameHash = ();
962        # Now we create the lists. Note that for every single name we push something into
963        # @fromList and @mappedNameList. This insures that those two arrays are exactly
964        # parallel to $objectNames.
965        for my $objectName (@{$objectNames}) {
966            # Get the next suffix for this object.
967            my $suffix = $objectHash{$objectName};
968            if (! $suffix) {
969                # Here we are seeing the object for the first time. The object name
970                # is used as is.
971                push @mappedNameList, $objectName;
972                push @fromList, $objectName;
973                $mappedNameHash{$objectName} = $objectName;
974                # Denote the next suffix will be 2.
975                $objectHash{$objectName} = 2;
976            } else {
977                # Here we've seen the object before. We construct a new name using
978                # the suffix from the hash and update the hash.
979                my $mappedName = "$objectName$suffix";
980                $objectHash{$objectName} = $suffix + 1;
981                # The FROM list has the object name followed by the mapped name. This
982                # tells SQL it's still the same table, but we're using a different name
983                # for it to avoid confusion.
984                push @fromList, "$objectName $mappedName";
985                # The mapped-name list contains the real mapped name.
986                push @mappedNameList, $mappedName;
987                # Finally, enable us to get back from the mapped name to the object name.
988                $mappedNameHash{$mappedName} = $objectName;
989            }
990        }
991      # Construct the SELECT statement. The general pattern is      # Construct the SELECT statement. The general pattern is
992      #      #
993      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
994      #      #
995      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
996      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
997                  join(', ', @{$objectNames});                  join(', ', @fromList);
998      # Check for a filter clause.      # Check for a filter clause.
999      if ($filterClause) {      if ($filterClause) {
1000          # Here we have one, so we convert its field names and add it to the query. First,          # Here we have one, so we convert its field names and add it to the query. First,
# Line 862  Line 1002 
1002          my $filterString = $filterClause;          my $filterString = $filterClause;
1003          # Next, we sort the object names by length. This helps protect us from finding          # Next, we sort the object names by length. This helps protect us from finding
1004          # object names inside other object names when we're doing our search and replace.          # object names inside other object names when we're doing our search and replace.
1005          my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1006          # We will also keep a list of conditions to add to the WHERE clause in order to link          # We will also keep a list of conditions to add to the WHERE clause in order to link
1007          # entities and relationships as well as primary relations to secondary ones.          # entities and relationships as well as primary relations to secondary ones.
1008          my @joinWhere = ();          my @joinWhere = ();
1009          # The final preparatory step is to create a hash table of relation names. The          # The final preparatory step is to create a hash table of relation names. The
1010          # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1011          my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1012          for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1013              $fromNames{$objectName} = 1;          # occurring or optional fields.
1014          }          my %fromNames = map { $_ => 1 } @sortedNames;
1015          # We are ready to begin. We loop through the object names, replacing each          # We are ready to begin. We loop through the object names, replacing each
1016          # object name's field references by the corresponding SQL field reference.          # object name's field references by the corresponding SQL field reference.
1017          # Along the way, if we find a secondary relation, we will need to add it          # Along the way, if we find a secondary relation, we will need to add it
1018          # to the FROM clause.          # to the FROM clause.
1019          for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1020              # Get the length of the object name plus 2. This is the value we add to the              # Get the length of the object name plus 2. This is the value we add to the
1021              # size of the field name to determine the size of the field reference as a              # size of the field name to determine the size of the field reference as a
1022              # whole.              # whole.
1023              my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1024                # Get the real object name for this mapped name.
1025                my $objectName = $mappedNameHash{$mappedName};
1026                Trace("Processing $mappedName for object $objectName.") if T(4);
1027              # Get the object's field list.              # Get the object's field list.
1028              my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1029              # Find the field references for this object.              # Find the field references for this object.
1030              while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1031                  # At this point, $1 contains the field name, and the current position                  # At this point, $1 contains the field name, and the current position
1032                  # is set immediately after the final parenthesis. We pull out the name of                  # is set immediately after the final parenthesis. We pull out the name of
1033                  # the field and the position and length of the field reference as a whole.                  # the field and the position and length of the field reference as a whole.
# Line 895  Line 1038 
1038                  if (!exists $fieldList->{$fieldName}) {                  if (!exists $fieldList->{$fieldName}) {
1039                      Confess("Field $fieldName not found for object $objectName.");                      Confess("Field $fieldName not found for object $objectName.");
1040                  } else {                  } else {
1041                        Trace("Processing $fieldName at position $pos.") if T(4);
1042                      # Get the field's relation.                      # Get the field's relation.
1043                      my $relationName = $fieldList->{$fieldName}->{relation};                      my $relationName = $fieldList->{$fieldName}->{relation};
1044                        # Now we have a secondary relation. We need to insure it matches the
1045                        # mapped name of the primary relation. First we peel off the suffix
1046                        # from the mapped name.
1047                        my $mappingSuffix = substr $mappedName, length($objectName);
1048                        # Put the mapping suffix onto the relation name to get the
1049                        # mapped relation name.
1050                        my $mappedRelationName = "$relationName$mappingSuffix";
1051                      # Insure the relation is in the FROM clause.                      # Insure the relation is in the FROM clause.
1052                      if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1053                          # Add the relation to the FROM clause.                          # Add the relation to the FROM clause.
1054                            if ($mappedRelationName eq $relationName) {
1055                                # The name is un-mapped, so we add it without
1056                                # any frills.
1057                          $command .= ", $relationName";                          $command .= ", $relationName";
                         # Create its join sub-clause.  
1058                          push @joinWhere, "$objectName.id = $relationName.id";                          push @joinWhere, "$objectName.id = $relationName.id";
1059                          # Denote we have it available for future fields.                          } else {
1060                          $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1061                                $command .= ", $relationName $mappedRelationName";
1062                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1063                            }
1064                            # Denote we have this relation available for future fields.
1065                            $fromNames{$mappedRelationName} = 1;
1066                      }                      }
1067                      # Form an SQL field reference from the relation name and the field name.                      # Form an SQL field reference from the relation name and the field name.
1068                      my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1069                      # Put it into the filter string in place of the old value.                      # Put it into the filter string in place of the old value.
1070                      substr($filterString, $pos, $len) = $sqlReference;                      substr($filterString, $pos, $len) = $sqlReference;
1071                      # Reposition the search.                      # Reposition the search.
# Line 919  Line 1077 
1077          # is more than one object in the object list. We start with the first object and          # is more than one object in the object list. We start with the first object and
1078          # run through the objects after it. Note also that we make a safety copy of the          # run through the objects after it. Note also that we make a safety copy of the
1079          # list before running through it.          # list before running through it.
1080          my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1081          my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1082          # Get the join table.          # Get the join table.
1083          my $joinTable = $self->{_metaData}->{Joins};          my $joinTable = $self->{_metaData}->{Joins};
1084          # Loop through the object list.          # Loop through the object list.
1085          for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1086              # Look for a join.              # Look for a join using the real object names.
1087                my $lastObject = $mappedNameHash{$lastMappedObject};
1088                my $thisObject = $mappedNameHash{$thisMappedObject};
1089              my $joinKey = "$lastObject/$thisObject";              my $joinKey = "$lastObject/$thisObject";
1090              if (!exists $joinTable->{$joinKey}) {              if (!exists $joinTable->{$joinKey}) {
1091                  # Here there's no join, so we throw an error.                  # Here there's no join, so we throw an error.
1092                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1093              } else {              } else {
1094                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1095                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1096                    # Fix the names.
1097                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1098                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1099                    push @joinWhere, $unMappedJoin;
1100                  # Save this object as the last object for the next iteration.                  # Save this object as the last object for the next iteration.
1101                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1102              }              }
1103          }          }
1104          # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
1105          # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
1106            # We'll put the ORDER BY / LIMIT clauses in the following variable.
1107          my $orderClause = "";          my $orderClause = "";
1108          # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
1109          if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
1110              # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
1111                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
1112              my $pos = pos $filterString;              my $pos = pos $filterString;
1113              $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
1114              $filterString = $1;              $filterString = $1;
1115          }          }
1116          # Add the filter and the join clauses (if any) to the SELECT command.          # Add the filter and the join clauses (if any) to the SELECT command.
1117          if ($filterString) {          if ($filterString) {
1118                Trace("Filter string is \"$filterString\".") if T(4);
1119              push @joinWhere, "($filterString)";              push @joinWhere, "($filterString)";
1120          }          }
1121          if (@joinWhere) {          if (@joinWhere) {
1122              $command .= " WHERE " . join(' AND ', @joinWhere);              $command .= " WHERE " . join(' AND ', @joinWhere);
1123          }          }
1124          # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
1125          if ($orderClause) {          if ($orderClause) {
1126              $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
1127          }          }
1128      }      }
1129      Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(SQL => 3);
1130      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
1131      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
1132      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
1133      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1134        # Now we create the relation map, which enables DBQuery to determine the order, name
1135        # and mapped name for each object in the query.
1136        my @relationMap = ();
1137        for my $mappedName (@mappedNameList) {
1138            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1139        }
1140      # Return the statement object.      # Return the statement object.
1141      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1142        return $retVal;
1143    }
1144    
1145    =head3 Delete
1146    
1147    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1148    
1149    Delete an entity instance from the database. The instance is deleted along with all entity and
1150    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1151    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1152    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1153    dependent relationship.
1154    
1155    =over 4
1156    
1157    =item entityName
1158    
1159    Name of the entity type for the instance being deleted.
1160    
1161    =item objectID
1162    
1163    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1164    then it is presumed to by a LIKE pattern.
1165    
1166    =item testFlag
1167    
1168    If TRUE, the delete statements will be traced without being executed.
1169    
1170    =item RETURN
1171    
1172    Returns a statistics object indicating how many records of each particular table were
1173    deleted.
1174    
1175    =back
1176    
1177    =cut
1178    #: Return Type $%;
1179    sub Delete {
1180        # Get the parameters.
1181        my ($self, $entityName, $objectID, $testFlag) = @_;
1182        # Declare the return variable.
1183        my $retVal = Stats->new();
1184        # Get the DBKernel object.
1185        my $db = $self->{_dbh};
1186        # We're going to generate all the paths branching out from the starting entity. One of
1187        # the things we have to be careful about is preventing loops. We'll use a hash to
1188        # determine if we've hit a loop.
1189        my %alreadyFound = ();
1190        # These next lists will serve as our result stack. We start by pushing object lists onto
1191        # the stack, and then popping them off to do the deletes. This means the deletes will
1192        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1193        # sure we don't delete records that might be needed to forge relationships back to the
1194        # original item. We have two lists-- one for TO-relationships, and one for
1195        # FROM-relationships and entities.
1196        my @fromPathList = ();
1197        my @toPathList = ();
1198        # This final hash is used to remember what work still needs to be done. We push paths
1199        # onto the list, then pop them off to extend the paths. We prime it with the starting
1200        # point. Note that we will work hard to insure that the last item on a path in the
1201        # TODO list is always an entity.
1202        my @todoList = ([$entityName]);
1203        while (@todoList) {
1204            # Get the current path.
1205            my $current = pop @todoList;
1206            # Copy it into a list.
1207            my @stackedPath = @{$current};
1208            # Pull off the last item on the path. It will always be an entity.
1209            my $entityName = pop @stackedPath;
1210            # Add it to the alreadyFound list.
1211            $alreadyFound{$entityName} = 1;
1212            # Get the entity data.
1213            my $entityData = $self->_GetStructure($entityName);
1214            # The first task is to loop through the entity's relation. A DELETE command will
1215            # be needed for each of them.
1216            my $relations = $entityData->{Relations};
1217            for my $relation (keys %{$relations}) {
1218                my @augmentedList = (@stackedPath, $relation);
1219                push @fromPathList, \@augmentedList;
1220            }
1221            # Now we need to look for relationships connected to this entity.
1222            my $relationshipList = $self->{_metaData}->{Relationships};
1223            for my $relationshipName (keys %{$relationshipList}) {
1224                my $relationship = $relationshipList->{$relationshipName};
1225                # Check the FROM field. We're only interested if it's us.
1226                if ($relationship->{from} eq $entityName) {
1227                    # Add the path to this relationship.
1228                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1229                    push @fromPathList, \@augmentedList;
1230                    # Check the arity. If it's MM we're done. If it's 1M
1231                    # and the target hasn't been seen yet, we want to
1232                    # stack the entity for future processing.
1233                    if ($relationship->{arity} eq '1M') {
1234                        my $toEntity = $relationship->{to};
1235                        if (! exists $alreadyFound{$toEntity}) {
1236                            # Here we have a new entity that's dependent on
1237                            # the current entity, so we need to stack it.
1238                            my @stackList = (@augmentedList, $toEntity);
1239                            push @fromPathList, \@stackList;
1240                        } else {
1241                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1242                        }
1243                    }
1244                }
1245                # Now check the TO field. In this case only the relationship needs
1246                # deletion.
1247                if ($relationship->{to} eq $entityName) {
1248                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1249                    push @toPathList, \@augmentedList;
1250                }
1251            }
1252        }
1253        # Create the first qualifier for the WHERE clause. This selects the
1254        # keys of the primary entity records to be deleted. When we're deleting
1255        # from a dependent table, we construct a join page from the first qualifier
1256        # to the table containing the dependent records to delete.
1257        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1258        # We need to make two passes. The first is through the to-list, and
1259        # the second through the from-list. The from-list is second because
1260        # the to-list may need to pass through some of the entities the
1261        # from-list would delete.
1262        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1263        # Now it's time to do the deletes. We do it in two passes.
1264        for my $keyName ('to_link', 'from_link') {
1265            # Get the list for this key.
1266            my @pathList = @{$stackList{$keyName}};
1267            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1268            # Loop through this list.
1269            while (my $path = pop @pathList) {
1270                # Get the table whose rows are to be deleted.
1271                my @pathTables = @{$path};
1272                # Start the DELETE statement. We need to call DBKernel because the
1273                # syntax of a DELETE-USING varies among DBMSs.
1274                my $target = $pathTables[$#pathTables];
1275                my $stmt = $db->SetUsing(@pathTables);
1276                # Now start the WHERE. The first thing is the ID field from the starting table. That
1277                # starting table will either be the entity relation or one of the entity's
1278                # sub-relations.
1279                $stmt .= " WHERE $pathTables[0].id $qualifier";
1280                # Now we run through the remaining entities in the path, connecting them up.
1281                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1282                    # Connect the current relationship to the preceding entity.
1283                    my ($entity, $rel) = @pathTables[$i-1,$i];
1284                    # The style of connection depends on the direction of the relationship.
1285                    $stmt .= " AND $entity.id = $rel.$keyName";
1286                    if ($i + 1 <= $#pathTables) {
1287                        # Here there's a next entity, so connect that to the relationship's
1288                        # to-link.
1289                        my $entity2 = $pathTables[$i+1];
1290                        $stmt .= " AND $rel.to_link = $entity2.id";
1291                    }
1292                }
1293                # Now we have our desired DELETE statement.
1294                if ($testFlag) {
1295                    # Here the user wants to trace without executing.
1296                    Trace($stmt) if T(0);
1297                } else {
1298                    # Here we can delete. Note that the SQL method dies with a confessing
1299                    # if an error occurs, so we just go ahead and do it.
1300                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1301                    my $rv = $db->SQL($stmt, 0, $objectID);
1302                    # Accumulate the statistics for this delete. The only rows deleted
1303                    # are from the target table, so we use its name to record the
1304                    # statistic.
1305                    $retVal->Add($target, $rv);
1306                }
1307            }
1308        }
1309        # Return the result.
1310      return $retVal;      return $retVal;
1311  }  }
1312    
1313  =head3 GetList  =head3 GetList
1314    
1315  C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
1316    
1317  Return a list of object descriptors for the specified objects as determined by the  Return a list of object descriptors for the specified objects as determined by the
1318  specified filter clause.  specified filter clause.
# Line 1034  Line 1375 
1375    
1376  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1377    
1378  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1379    
1380  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1381    
# Line 1069  Line 1410 
1410    
1411  =head3 DumpRelations  =head3 DumpRelations
1412    
1413  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1414    
1415  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1416  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 1111  Line 1452 
1452    
1453  =head3 InsertObject  =head3 InsertObject
1454    
1455  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1456    
1457  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1458  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 1120  Line 1461 
1461  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1462  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1463    
1464  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1465    
1466  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1467  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1468    
1469  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1470    
1471  =over 4  =over 4
1472    
# Line 1250  Line 1591 
1591    
1592  =head3 LoadTable  =head3 LoadTable
1593    
1594  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1595    
1596  Load data from a tab-delimited file into a specified table, optionally re-creating the table  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1597  first.  first.
# Line 1271  Line 1612 
1612    
1613  =item RETURN  =item RETURN
1614    
1615  Returns a statistical object containing the number of records read and a list of  Returns a statistical object containing a list of the error messages.
 the error messages.  
1616    
1617  =back  =back
1618    
# Line 1286  Line 1626 
1626      Trace("Loading table $relationName from $fileName") if T(2);      Trace("Loading table $relationName from $fileName") if T(2);
1627      # Get the database handle.      # Get the database handle.
1628      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1629        # Get the input file size.
1630        my $fileSize = -s $fileName;
1631      # Get the relation data.      # Get the relation data.
1632      my $relation = $self->_FindRelation($relationName);      my $relation = $self->_FindRelation($relationName);
1633      # Check the truncation flag.      # Check the truncation flag.
1634      if ($truncateFlag) {      if ($truncateFlag) {
1635          Trace("Creating table $relationName") if T(2);          Trace("Creating table $relationName") if T(2);
1636            # Compute the row count estimate. We take the size of the load file,
1637            # divide it by the estimated row size, and then multiply by 1.5 to
1638            # leave extra room. We postulate a minimum row count of 1000 to
1639            # prevent problems with incoming empty load files.
1640            my $rowSize = $self->EstimateRowSize($relationName);
1641            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1642          # Re-create the table without its index.          # Re-create the table without its index.
1643          $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1644          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
1645          if ($dbh->{_preIndex}) {          if ($dbh->{_preIndex}) {
1646              eval {              eval {
# Line 1303  Line 1651 
1651              }              }
1652          }          }
1653      }      }
     # Determine whether or not this is a primary relation. Primary relations have an extra  
     # field indicating whether or not a given object is new or was loaded from the flat files.  
     my $primary = $self->_IsPrimary($relationName);  
     # Get the number of fields in this relation.  
     my @fieldList = @{$relation->{Fields}};  
     my $fieldCount = @fieldList;  
     # Start a database transaction.  
     $dbh->begin_tran;  
     # Open the relation file. We need to create a cleaned-up copy before loading.  
     open TABLEIN, '<', $fileName;  
     my $tempName = "$fileName.tbl";  
     open TABLEOUT, '>', $tempName;  
     my $inputCount = 0;  
     # Loop through the file.  
     while (<TABLEIN>) {  
         $inputCount++;  
         # Chop off the new-line character.  
         my $record = Tracer::Strip($_);  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Insure the number of fields is correct.  
             my @fields = split /\t/, $record;  
             while (@fields > $fieldCount) {  
                 my $extraField = $fields[$#fields];  
                 delete $fields[$#fields];  
                 if ($extraField) {  
                     Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1);  
                 }  
             }  
             while (@fields < $fieldCount) {  
                 push @fields, "";  
             }  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 push @fields, "0";  
             }  
             # Write the record.  
             $record = join "\t", @fields;  
             print TABLEOUT "$record\n";  
             # Count the record written.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         } else {  
             # Here we have a blank record.  
             $retVal->Add('skipped');  
         }  
     }  
     # Close the files.  
     close TABLEIN;  
     close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(2);  
1654      # Load the table.      # Load the table.
1655      my $rv;      my $rv;
1656      eval {      eval {
1657          $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1658      };      };
1659      if (!defined $rv) {      if (!defined $rv) {
1660          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1661          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1662          Trace("Table load failed for $relationName.") if T(1);          Trace("Table load failed for $relationName.") if T(1);
1663      } else {      } else {
1664          # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1665          Trace("$retVal->{records} records read for $relationName.") if T(2);          $retVal->Add("tables");
1666            my $size = -s $fileName;
1667            Trace("$size bytes loaded into $relationName.") if T(2);
1668          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
1669          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1670              eval {              eval {
# Line 1379  Line 1674 
1674                  $retVal->AddMessage($@);                  $retVal->AddMessage($@);
1675              }              }
1676          }          }
         # Analyze the table to help optimize tables.  
1677      }      }
1678      # Commit the database changes.      # Analyze the table to improve performance.
     $dbh->commit_tran;  
1679      $dbh->vacuum_it($relationName);      $dbh->vacuum_it($relationName);
     # Delete the temporary file.  
     unlink $tempName;  
1680      # Return the statistics.      # Return the statistics.
1681      return $retVal;      return $retVal;
1682  }  }
1683    
1684  =head3 GenerateEntity  =head3 GenerateEntity
1685    
1686  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1687    
1688  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1689  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1450  Line 1741 
1741    
1742  =head3 GetEntity  =head3 GetEntity
1743    
1744  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1745    
1746  Return an object describing the entity instance with a specified ID.  Return an object describing the entity instance with a specified ID.
1747    
# Line 1486  Line 1777 
1777    
1778  =head3 GetEntityValues  =head3 GetEntityValues
1779    
1780  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1781    
1782  Return a list of values from a specified entity instance.  Return a list of values from a specified entity instance.
1783    
# Line 1529  Line 1820 
1820    
1821  =head3 GetAll  =head3 GetAll
1822    
1823  C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1824    
1825  Return a list of values taken from the objects returned by a query. The first three  Return a list of values taken from the objects returned by a query. The first three
1826  parameters correspond to the parameters of the L</Get> method. The final parameter is  parameters correspond to the parameters of the L</Get> method. The final parameter is
# Line 1545  Line 1836 
1836  spreadsheet cell, and each feature will be represented by a list containing the  spreadsheet cell, and each feature will be represented by a list containing the
1837  feature ID followed by all of its aliases.  feature ID followed by all of its aliases.
1838    
1839  C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1840    
1841  =over 4  =over 4
1842    
# Line 1594  Line 1885 
1885      } else {      } else {
1886          push @parmList, $parameterList;          push @parmList, $parameterList;
1887      }      }
     # Create the query.  
     my $query = $self->Get($objectNames, $filterClause, @parmList);  
     # Set up a counter of the number of records read.  
     my $fetched = 0;  
1888      # Insure the counter has a value.      # Insure the counter has a value.
1889      if (!defined $count) {      if (!defined $count) {
1890          $count = 0;          $count = 0;
1891      }      }
1892        # Add the row limit to the filter clause.
1893        if ($count > 0) {
1894            $filterClause .= " LIMIT $count";
1895        }
1896        # Create the query.
1897        my $query = $self->Get($objectNames, $filterClause, @parmList);
1898        # Set up a counter of the number of records read.
1899        my $fetched = 0;
1900      # Loop through the records returned, extracting the fields. Note that if the      # Loop through the records returned, extracting the fields. Note that if the
1901      # counter is non-zero, we stop when the number of records read hits the count.      # counter is non-zero, we stop when the number of records read hits the count.
1902      my @retVal = ();      my @retVal = ();
# Line 1614  Line 1909 
1909      return @retVal;      return @retVal;
1910  }  }
1911    
1912    =head3 EstimateRowSize
1913    
1914    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1915    
1916    Estimate the row size of the specified relation. The estimated row size is computed by adding
1917    up the average length for each data type.
1918    
1919    =over 4
1920    
1921    =item relName
1922    
1923    Name of the relation whose estimated row size is desired.
1924    
1925    =item RETURN
1926    
1927    Returns an estimate of the row size for the specified relation.
1928    
1929    =back
1930    
1931    =cut
1932    #: Return Type $;
1933    sub EstimateRowSize {
1934        # Get the parameters.
1935        my ($self, $relName) = @_;
1936        # Declare the return variable.
1937        my $retVal = 0;
1938        # Find the relation descriptor.
1939        my $relation = $self->_FindRelation($relName);
1940        # Get the list of fields.
1941        for my $fieldData (@{$relation->{Fields}}) {
1942            # Get the field type and add its length.
1943            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1944            $retVal += $fieldLen;
1945        }
1946        # Return the result.
1947        return $retVal;
1948    }
1949    
1950    =head3 GetFieldTable
1951    
1952    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
1953    
1954    Get the field structure for a specified entity or relationship.
1955    
1956    =over 4
1957    
1958    =item objectName
1959    
1960    Name of the desired entity or relationship.
1961    
1962    =item RETURN
1963    
1964    The table containing the field descriptors for the specified object.
1965    
1966    =back
1967    
1968    =cut
1969    
1970    sub GetFieldTable {
1971        # Get the parameters.
1972        my ($self, $objectName) = @_;
1973        # Get the descriptor from the metadata.
1974        my $objectData = $self->_GetStructure($objectName);
1975        # Return the object's field table.
1976        return $objectData->{Fields};
1977    }
1978    
1979    =head3 GetUsefulCrossValues
1980    
1981    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
1982    
1983    Return a list of the useful attributes that would be returned by a B<Cross> call
1984    from an entity of the source entity type through the specified relationship. This
1985    means it will return the fields of the target entity type and the intersection data
1986    fields in the relationship. Only primary table fields are returned. In other words,
1987    the field names returned will be for fields where there is always one and only one
1988    value.
1989    
1990    =over 4
1991    
1992    =item sourceEntity
1993    
1994    Name of the entity from which the relationship crossing will start.
1995    
1996    =item relationship
1997    
1998    Name of the relationship being crossed.
1999    
2000    =item RETURN
2001    
2002    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2003    
2004    =back
2005    
2006    =cut
2007    #: Return Type @;
2008    sub GetUsefulCrossValues {
2009        # Get the parameters.
2010        my ($self, $sourceEntity, $relationship) = @_;
2011        # Declare the return variable.
2012        my @retVal = ();
2013        # Determine the target entity for the relationship. This is whichever entity is not
2014        # the source entity. So, if the source entity is the FROM, we'll get the name of
2015        # the TO, and vice versa.
2016        my $relStructure = $self->_GetStructure($relationship);
2017        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2018        my $targetEntity = $relStructure->{$targetEntityType};
2019        # Get the field table for the entity.
2020        my $entityFields = $self->GetFieldTable($targetEntity);
2021        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2022        # For the entity fields, the key aspect of the target structure is that the {relation} value
2023        # must match the entity name.
2024        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2025                            keys %{$entityFields};
2026        # Push the fields found onto the return variable.
2027        push @retVal, sort @fieldList;
2028        # Get the field table for the relationship.
2029        my $relationshipFields = $self->GetFieldTable($relationship);
2030        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2031        # This may end up being an empty set.
2032        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2033                            keys %{$relationshipFields};
2034        # Push these onto the return list.
2035        push @retVal, sort @fieldList2;
2036        # Return the result.
2037        return @retVal;
2038    }
2039    
2040  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2041    
2042  =head3 GetLoadStats  =head3 GetLoadStats
# Line 1625  Line 2048 
2048  =cut  =cut
2049    
2050  sub _GetLoadStats {  sub _GetLoadStats {
2051      return Stats->new('records');      return Stats->new();
2052  }  }
2053    
2054  =head3 GenerateFields  =head3 GenerateFields
# Line 1820  Line 2243 
2243      return $objectData->{Relations};      return $objectData->{Relations};
2244  }  }
2245    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
2246  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2247    
2248  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
# Line 2196  Line 2590 
2590          my @fromList = ();          my @fromList = ();
2591          my @toList = ();          my @toList = ();
2592          my @bothList = ();          my @bothList = ();
2593          Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
2594          for my $relationshipName (keys %{$relationshipList}) {          for my $relationshipName (keys %{$relationshipList}) {
2595              my $relationship = $relationshipList->{$relationshipName};              my $relationship = $relationshipList->{$relationshipName};
2596              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
2597              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
2598              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
2599              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
2600              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
2601                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
2602                      # Here the relationship is recursive.                      # Here the relationship is recursive.
2603                      push @bothList, $relationshipName;                      push @bothList, $relationshipName;
2604                      Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2605                  } else {                  } else {
2606                      # Here the relationship comes from the entity.                      # Here the relationship comes from the entity.
2607                      push @fromList, $relationshipName;                      push @fromList, $relationshipName;
2608                      Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2609                  }                  }
2610              } elsif ($toEntity eq $entityName) {              } elsif ($toEntity eq $entityName) {
2611                  # Here the relationship goes to the entity.                  # Here the relationship goes to the entity.
2612                  push @toList, $relationshipName;                  push @toList, $relationshipName;
2613                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2614              }              }
2615          }          }
2616          # Create the nonrecursive joins. Note that we build two hashes for running          # Create the nonrecursive joins. Note that we build two hashes for running
# Line 2232  Line 2626 
2626                  # Create joins between the entity and this relationship.                  # Create joins between the entity and this relationship.
2627                  my $linkField = "$relationshipName.${linkType}_link";                  my $linkField = "$relationshipName.${linkType}_link";
2628                  my $joinClause = "$entityName.id = $linkField";                  my $joinClause = "$entityName.id = $linkField";
2629                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2630                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2631                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2632                  # Create joins between this relationship and the other relationships.                  # Create joins between this relationship and the other relationships.
# Line 2253  Line 2647 
2647                              # relationship and itself are prohibited.                              # relationship and itself are prohibited.
2648                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2649                              $joinTable{$joinKey} = $relJoinClause;                              $joinTable{$joinKey} = $relJoinClause;
2650                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2651                          }                          }
2652                      }                      }
2653                  }                  }
# Line 2262  Line 2656 
2656                  # relationship can only be ambiguous with another recursive relationship,                  # relationship can only be ambiguous with another recursive relationship,
2657                  # and the incoming relationship from the outer loop is never recursive.                  # and the incoming relationship from the outer loop is never recursive.
2658                  for my $otherName (@bothList) {                  for my $otherName (@bothList) {
2659                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2660                      # Join from the left.                      # Join from the left.
2661                      $joinTable{"$relationshipName/$otherName"} =                      $joinTable{"$relationshipName/$otherName"} =
2662                          "$linkField = $otherName.from_link";                          "$linkField = $otherName.from_link";
# Line 2277  Line 2671 
2671          # rise to situations where we can't create the path we want; however, it is always          # rise to situations where we can't create the path we want; however, it is always
2672          # possible to get the same effect using multiple queries.          # possible to get the same effect using multiple queries.
2673          for my $relationshipName (@bothList) {          for my $relationshipName (@bothList) {
2674              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2675              # Join to the entity from each direction.              # Join to the entity from each direction.
2676              $joinTable{"$entityName/$relationshipName"} =              $joinTable{"$entityName/$relationshipName"} =
2677                  "$entityName.id = $relationshipName.from_link";                  "$entityName.id = $relationshipName.from_link";

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.41

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3