[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26, Tue Oct 18 06:47:46 2005 UTC revision 1.44, Sat May 27 02:02:28 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13      use FIG;      use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
# Line 109  Line 110 
110  compatability with certain database packages), but the only values supported are  compatability with certain database packages), but the only values supported are
111  0 and 1.  0 and 1.
112    
113    =item id-string
114    
115    variable-length string, maximum 25 characters
116    
117  =item key-string  =item key-string
118    
119  variable-length string, maximum 40 characters  variable-length string, maximum 40 characters
# Line 125  Line 130 
130    
131  variable-length string, maximum 255 characters  variable-length string, maximum 255 characters
132    
133    =item hash-string
134    
135    variable-length string, maximum 22 characters
136    
137  =back  =back
138    
139    The hash-string data type has a special meaning. The actual key passed into the loader will
140    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
141    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
142    digest. Therefore, it is presumed the keys will be unique. When the database is actually
143    in use, the hashed keys will be presented rather than the original values. For this reason,
144    they should not be used for entities where the key is meaningful.
145    
146  =head3 Global Tags  =head3 Global Tags
147    
148  The entire database definition must be inside a B<Database> tag. The display name of  The entire database definition must be inside a B<Database> tag. The display name of
# Line 309  Line 325 
325                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
326                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
327                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
328                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   2, dataGen => "IntGen(0, 1)" },                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
329                     'hash-string' =>
330                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, dataGen => "SringGen(22)" },
331                     'id-string' =>
332                                 { sqlType => 'VARCHAR(25)',        maxLen => 25,           avgLen =>  25, dataGen => "SringGen(22)" },
333                   'key-string' =>                   'key-string' =>
334                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
335                   'name-string' =>                   'name-string' =>
# Line 508  Line 528 
528          # Separate out the source, the target, and the join clause.          # Separate out the source, the target, and the join clause.
529          $joinKey =~ m!^([^/]+)/(.+)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
530          my ($sourceRelation, $targetRelation) = ($1, $2);          my ($sourceRelation, $targetRelation) = ($1, $2);
531          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
532          my $source = $self->ComputeObjectSentence($sourceRelation);          my $source = $self->ComputeObjectSentence($sourceRelation);
533          my $target = $self->ComputeObjectSentence($targetRelation);          my $target = $self->ComputeObjectSentence($targetRelation);
534          my $clause = $joinTable->{$joinKey};          my $clause = $joinTable->{$joinKey};
# Line 632  Line 652 
652      }      }
653  }  }
654    
655    =head3 VerifyFields
656    
657    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
658    
659    Run through the list of proposed field values, insuring that all the character fields are
660    below the maximum length. If any fields are too long, they will be truncated in place.
661    
662    =over 4
663    
664    =item relName
665    
666    Name of the relation for which the specified fields are destined.
667    
668    =item fieldList
669    
670    Reference to a list, in order, of the fields to be put into the relation.
671    
672    =item RETURN
673    
674    Returns the number of fields truncated.
675    
676    =back
677    
678    =cut
679    
680    sub VerifyFields {
681        # Get the parameters.
682        my ($self, $relName, $fieldList) = @_;
683        # Initialize the return value.
684        my $retVal = 0;
685        # Get the relation definition.
686        my $relData = $self->_FindRelation($relName);
687        # Get the list of field descriptors.
688        my $fieldTypes = $relData->{Fields};
689        my $fieldCount = scalar @{$fieldTypes};
690        # Loop through the two lists.
691        for (my $i = 0; $i < $fieldCount; $i++) {
692            # Get the type of the current field.
693            my $fieldType = $fieldTypes->[$i]->{type};
694            # If it's a character field, verify the length.
695            if ($fieldType =~ /string/) {
696                my $maxLen = $TypeTable{$fieldType}->{maxLen};
697                my $oldString = $fieldList->[$i];
698                if (length($oldString) > $maxLen) {
699                    # Here it's too big, so we truncate it.
700                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
701                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
702                    $retVal++;
703                }
704            }
705        }
706        # Return the truncation count.
707        return $retVal;
708    }
709    
710    =head3 DigestFields
711    
712    C<< $erdb->DigestFields($relName, $fieldList); >>
713    
714    Digest the strings in the field list that correspond to data type C<hash-string> in the
715    specified relation.
716    
717    =over 4
718    
719    =item relName
720    
721    Name of the relation to which the fields belong.
722    
723    =item fieldList
724    
725    List of field contents to be loaded into the relation.
726    
727    =back
728    
729    =cut
730    #: Return Type ;
731    sub DigestFields {
732        # Get the parameters.
733        my ($self, $relName, $fieldList) = @_;
734        # Get the relation definition.
735        my $relData = $self->_FindRelation($relName);
736        # Get the list of field descriptors.
737        my $fieldTypes = $relData->{Fields};
738        my $fieldCount = scalar @{$fieldTypes};
739        # Loop through the two lists.
740        for (my $i = 0; $i < $fieldCount; $i++) {
741            # Get the type of the current field.
742            my $fieldType = $fieldTypes->[$i]->{type};
743            # If it's a hash string, digest it in place.
744            if ($fieldType eq 'hash-string') {
745                $fieldList->[$i] = md5_base64($fieldList->[$i]);
746            }
747        }
748    }
749    
750  =head3 CreateIndex  =head3 CreateIndex
751    
752  C<< $erdb->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
# Line 822  Line 937 
937    
938  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
939  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
940  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
941  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
942  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
943  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
944  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
945    
946    If an entity or relationship is mentioned twice, the name for the second occurrence will
947    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
948    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
949    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
950    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
951    
952  =over 4  =over 4
953    
954  =item objectNames  =item objectNames
# Line 851  Line 971 
971    
972  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
973    
974    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
975    be processed. The idea is to make it less likely to find the verb by accident.
976    
977  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
978  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
979  relation.  relation.
980    
981    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
982    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
983    a positive number. So, for example
984    
985    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
986    
987    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
988    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
989    use
990    
991    C<< "LIMIT 10" >>
992    
993  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
994    
995  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 870  Line 1005 
1005  sub Get {  sub Get {
1006      # Get the parameters.      # Get the parameters.
1007      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, @params) = @_;
1008        # Adjust the list of object names to account for multiple occurrences of the
1009        # same object. We start with a hash table keyed on object name that will
1010        # return the object suffix. The first time an object is encountered it will
1011        # not be found in the hash. The next time the hash will map the object name
1012        # to 2, then 3, and so forth.
1013        my %objectHash = ();
1014        # This list will contain the object names as they are to appear in the
1015        # FROM list.
1016        my @fromList = ();
1017        # This list contains the suffixed object name for each object. It is exactly
1018        # parallel to the list in the $objectNames parameter.
1019        my @mappedNameList = ();
1020        # Finally, this hash translates from a mapped name to its original object name.
1021        my %mappedNameHash = ();
1022        # Now we create the lists. Note that for every single name we push something into
1023        # @fromList and @mappedNameList. This insures that those two arrays are exactly
1024        # parallel to $objectNames.
1025        for my $objectName (@{$objectNames}) {
1026            # Get the next suffix for this object.
1027            my $suffix = $objectHash{$objectName};
1028            if (! $suffix) {
1029                # Here we are seeing the object for the first time. The object name
1030                # is used as is.
1031                push @mappedNameList, $objectName;
1032                push @fromList, $objectName;
1033                $mappedNameHash{$objectName} = $objectName;
1034                # Denote the next suffix will be 2.
1035                $objectHash{$objectName} = 2;
1036            } else {
1037                # Here we've seen the object before. We construct a new name using
1038                # the suffix from the hash and update the hash.
1039                my $mappedName = "$objectName$suffix";
1040                $objectHash{$objectName} = $suffix + 1;
1041                # The FROM list has the object name followed by the mapped name. This
1042                # tells SQL it's still the same table, but we're using a different name
1043                # for it to avoid confusion.
1044                push @fromList, "$objectName $mappedName";
1045                # The mapped-name list contains the real mapped name.
1046                push @mappedNameList, $mappedName;
1047                # Finally, enable us to get back from the mapped name to the object name.
1048                $mappedNameHash{$mappedName} = $objectName;
1049            }
1050        }
1051      # Construct the SELECT statement. The general pattern is      # Construct the SELECT statement. The general pattern is
1052      #      #
1053      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
1054      #      #
1055      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1056      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
1057                  join(', ', @{$objectNames});                  join(', ', @fromList);
     Trace("SQL = $command") if T(SQL => 4);  
1058      # Check for a filter clause.      # Check for a filter clause.
1059      if ($filterClause) {      if ($filterClause) {
1060          # Here we have one, so we convert its field names and add it to the query. First,          # Here we have one, so we convert its field names and add it to the query. First,
# Line 885  Line 1062 
1062          my $filterString = $filterClause;          my $filterString = $filterClause;
1063          # Next, we sort the object names by length. This helps protect us from finding          # Next, we sort the object names by length. This helps protect us from finding
1064          # object names inside other object names when we're doing our search and replace.          # object names inside other object names when we're doing our search and replace.
1065          my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1066          # We will also keep a list of conditions to add to the WHERE clause in order to link          # We will also keep a list of conditions to add to the WHERE clause in order to link
1067          # entities and relationships as well as primary relations to secondary ones.          # entities and relationships as well as primary relations to secondary ones.
1068          my @joinWhere = ();          my @joinWhere = ();
1069          # The final preparatory step is to create a hash table of relation names. The          # The final preparatory step is to create a hash table of relation names. The
1070          # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1071          my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1072          for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1073              $fromNames{$objectName} = 1;          # occurring or optional fields.
1074          }          my %fromNames = map { $_ => 1 } @sortedNames;
1075          # We are ready to begin. We loop through the object names, replacing each          # We are ready to begin. We loop through the object names, replacing each
1076          # object name's field references by the corresponding SQL field reference.          # object name's field references by the corresponding SQL field reference.
1077          # Along the way, if we find a secondary relation, we will need to add it          # Along the way, if we find a secondary relation, we will need to add it
1078          # to the FROM clause.          # to the FROM clause.
1079          for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1080              # Get the length of the object name plus 2. This is the value we add to the              # Get the length of the object name plus 2. This is the value we add to the
1081              # size of the field name to determine the size of the field reference as a              # size of the field name to determine the size of the field reference as a
1082              # whole.              # whole.
1083              my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1084                # Get the real object name for this mapped name.
1085                my $objectName = $mappedNameHash{$mappedName};
1086                Trace("Processing $mappedName for object $objectName.") if T(4);
1087              # Get the object's field list.              # Get the object's field list.
1088              my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1089              # Find the field references for this object.              # Find the field references for this object.
1090              while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1091                  # At this point, $1 contains the field name, and the current position                  # At this point, $1 contains the field name, and the current position
1092                  # is set immediately after the final parenthesis. We pull out the name of                  # is set immediately after the final parenthesis. We pull out the name of
1093                  # the field and the position and length of the field reference as a whole.                  # the field and the position and length of the field reference as a whole.
# Line 918  Line 1098 
1098                  if (!exists $fieldList->{$fieldName}) {                  if (!exists $fieldList->{$fieldName}) {
1099                      Confess("Field $fieldName not found for object $objectName.");                      Confess("Field $fieldName not found for object $objectName.");
1100                  } else {                  } else {
1101                        Trace("Processing $fieldName at position $pos.") if T(4);
1102                      # Get the field's relation.                      # Get the field's relation.
1103                      my $relationName = $fieldList->{$fieldName}->{relation};                      my $relationName = $fieldList->{$fieldName}->{relation};
1104                        # Now we have a secondary relation. We need to insure it matches the
1105                        # mapped name of the primary relation. First we peel off the suffix
1106                        # from the mapped name.
1107                        my $mappingSuffix = substr $mappedName, length($objectName);
1108                        # Put the mapping suffix onto the relation name to get the
1109                        # mapped relation name.
1110                        my $mappedRelationName = "$relationName$mappingSuffix";
1111                      # Insure the relation is in the FROM clause.                      # Insure the relation is in the FROM clause.
1112                      if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1113                          # Add the relation to the FROM clause.                          # Add the relation to the FROM clause.
1114                            if ($mappedRelationName eq $relationName) {
1115                                # The name is un-mapped, so we add it without
1116                                # any frills.
1117                          $command .= ", $relationName";                          $command .= ", $relationName";
                         # Create its join sub-clause.  
1118                          push @joinWhere, "$objectName.id = $relationName.id";                          push @joinWhere, "$objectName.id = $relationName.id";
1119                          # Denote we have it available for future fields.                          } else {
1120                          $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1121                                $command .= ", $relationName $mappedRelationName";
1122                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1123                            }
1124                            # Denote we have this relation available for future fields.
1125                            $fromNames{$mappedRelationName} = 1;
1126                      }                      }
1127                      # Form an SQL field reference from the relation name and the field name.                      # Form an SQL field reference from the relation name and the field name.
1128                      my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1129                      # Put it into the filter string in place of the old value.                      # Put it into the filter string in place of the old value.
1130                      substr($filterString, $pos, $len) = $sqlReference;                      substr($filterString, $pos, $len) = $sqlReference;
1131                      # Reposition the search.                      # Reposition the search.
# Line 942  Line 1137 
1137          # is more than one object in the object list. We start with the first object and          # is more than one object in the object list. We start with the first object and
1138          # run through the objects after it. Note also that we make a safety copy of the          # run through the objects after it. Note also that we make a safety copy of the
1139          # list before running through it.          # list before running through it.
1140          my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1141          my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1142          # Get the join table.          # Get the join table.
1143          my $joinTable = $self->{_metaData}->{Joins};          my $joinTable = $self->{_metaData}->{Joins};
1144          # Loop through the object list.          # Loop through the object list.
1145          for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1146              # Look for a join.              # Look for a join using the real object names.
1147                my $lastObject = $mappedNameHash{$lastMappedObject};
1148                my $thisObject = $mappedNameHash{$thisMappedObject};
1149              my $joinKey = "$lastObject/$thisObject";              my $joinKey = "$lastObject/$thisObject";
1150              if (!exists $joinTable->{$joinKey}) {              if (!exists $joinTable->{$joinKey}) {
1151                  # Here there's no join, so we throw an error.                  # Here there's no join, so we throw an error.
1152                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1153              } else {              } else {
1154                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1155                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1156                    # Fix the names.
1157                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1158                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1159                    push @joinWhere, $unMappedJoin;
1160                  # Save this object as the last object for the next iteration.                  # Save this object as the last object for the next iteration.
1161                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1162              }              }
1163          }          }
1164          # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
1165          # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
1166            # We'll put the ORDER BY / LIMIT clauses in the following variable.
1167          my $orderClause = "";          my $orderClause = "";
1168          # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
1169          if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
1170              # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
1171                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
1172              my $pos = pos $filterString;              my $pos = pos $filterString;
1173              $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
1174              $filterString = $1;              $filterString = $1;
1175          }          }
1176          # Add the filter and the join clauses (if any) to the SELECT command.          # Add the filter and the join clauses (if any) to the SELECT command.
1177          if ($filterString) {          if ($filterString) {
1178                Trace("Filter string is \"$filterString\".") if T(4);
1179              push @joinWhere, "($filterString)";              push @joinWhere, "($filterString)";
1180          }          }
1181          if (@joinWhere) {          if (@joinWhere) {
1182              $command .= " WHERE " . join(' AND ', @joinWhere);              $command .= " WHERE " . join(' AND ', @joinWhere);
1183          }          }
1184          # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
1185          if ($orderClause) {          if ($orderClause) {
1186              $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
1187          }          }
1188      }      }
1189      Trace("SQL query: $command") if T(3);      Trace("SQL query: $command") if T(SQL => 3);
1190      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
1191      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
1192      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
1193      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1194        # Now we create the relation map, which enables DBQuery to determine the order, name
1195        # and mapped name for each object in the query.
1196        my @relationMap = ();
1197        for my $mappedName (@mappedNameList) {
1198            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1199        }
1200      # Return the statement object.      # Return the statement object.
1201      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1202        return $retVal;
1203    }
1204    
1205    =head3 Delete
1206    
1207    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1208    
1209    Delete an entity instance from the database. The instance is deleted along with all entity and
1210    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1211    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1212    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1213    dependent relationship.
1214    
1215    =over 4
1216    
1217    =item entityName
1218    
1219    Name of the entity type for the instance being deleted.
1220    
1221    =item objectID
1222    
1223    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1224    then it is presumed to by a LIKE pattern.
1225    
1226    =item testFlag
1227    
1228    If TRUE, the delete statements will be traced without being executed.
1229    
1230    =item RETURN
1231    
1232    Returns a statistics object indicating how many records of each particular table were
1233    deleted.
1234    
1235    =back
1236    
1237    =cut
1238    #: Return Type $%;
1239    sub Delete {
1240        # Get the parameters.
1241        my ($self, $entityName, $objectID, $testFlag) = @_;
1242        # Declare the return variable.
1243        my $retVal = Stats->new();
1244        # Get the DBKernel object.
1245        my $db = $self->{_dbh};
1246        # We're going to generate all the paths branching out from the starting entity. One of
1247        # the things we have to be careful about is preventing loops. We'll use a hash to
1248        # determine if we've hit a loop.
1249        my %alreadyFound = ();
1250        # These next lists will serve as our result stack. We start by pushing object lists onto
1251        # the stack, and then popping them off to do the deletes. This means the deletes will
1252        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1253        # sure we don't delete records that might be needed to forge relationships back to the
1254        # original item. We have two lists-- one for TO-relationships, and one for
1255        # FROM-relationships and entities.
1256        my @fromPathList = ();
1257        my @toPathList = ();
1258        # This final hash is used to remember what work still needs to be done. We push paths
1259        # onto the list, then pop them off to extend the paths. We prime it with the starting
1260        # point. Note that we will work hard to insure that the last item on a path in the
1261        # TODO list is always an entity.
1262        my @todoList = ([$entityName]);
1263        while (@todoList) {
1264            # Get the current path.
1265            my $current = pop @todoList;
1266            # Copy it into a list.
1267            my @stackedPath = @{$current};
1268            # Pull off the last item on the path. It will always be an entity.
1269            my $entityName = pop @stackedPath;
1270            # Add it to the alreadyFound list.
1271            $alreadyFound{$entityName} = 1;
1272            # Get the entity data.
1273            my $entityData = $self->_GetStructure($entityName);
1274            # The first task is to loop through the entity's relation. A DELETE command will
1275            # be needed for each of them.
1276            my $relations = $entityData->{Relations};
1277            for my $relation (keys %{$relations}) {
1278                my @augmentedList = (@stackedPath, $relation);
1279                push @fromPathList, \@augmentedList;
1280            }
1281            # Now we need to look for relationships connected to this entity.
1282            my $relationshipList = $self->{_metaData}->{Relationships};
1283            for my $relationshipName (keys %{$relationshipList}) {
1284                my $relationship = $relationshipList->{$relationshipName};
1285                # Check the FROM field. We're only interested if it's us.
1286                if ($relationship->{from} eq $entityName) {
1287                    # Add the path to this relationship.
1288                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1289                    push @fromPathList, \@augmentedList;
1290                    # Check the arity. If it's MM we're done. If it's 1M
1291                    # and the target hasn't been seen yet, we want to
1292                    # stack the entity for future processing.
1293                    if ($relationship->{arity} eq '1M') {
1294                        my $toEntity = $relationship->{to};
1295                        if (! exists $alreadyFound{$toEntity}) {
1296                            # Here we have a new entity that's dependent on
1297                            # the current entity, so we need to stack it.
1298                            my @stackList = (@augmentedList, $toEntity);
1299                            push @fromPathList, \@stackList;
1300                        } else {
1301                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1302                        }
1303                    }
1304                }
1305                # Now check the TO field. In this case only the relationship needs
1306                # deletion.
1307                if ($relationship->{to} eq $entityName) {
1308                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1309                    push @toPathList, \@augmentedList;
1310                }
1311            }
1312        }
1313        # Create the first qualifier for the WHERE clause. This selects the
1314        # keys of the primary entity records to be deleted. When we're deleting
1315        # from a dependent table, we construct a join page from the first qualifier
1316        # to the table containing the dependent records to delete.
1317        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1318        # We need to make two passes. The first is through the to-list, and
1319        # the second through the from-list. The from-list is second because
1320        # the to-list may need to pass through some of the entities the
1321        # from-list would delete.
1322        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1323        # Now it's time to do the deletes. We do it in two passes.
1324        for my $keyName ('to_link', 'from_link') {
1325            # Get the list for this key.
1326            my @pathList = @{$stackList{$keyName}};
1327            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1328            # Loop through this list.
1329            while (my $path = pop @pathList) {
1330                # Get the table whose rows are to be deleted.
1331                my @pathTables = @{$path};
1332                # Start the DELETE statement. We need to call DBKernel because the
1333                # syntax of a DELETE-USING varies among DBMSs.
1334                my $target = $pathTables[$#pathTables];
1335                my $stmt = $db->SetUsing(@pathTables);
1336                # Now start the WHERE. The first thing is the ID field from the starting table. That
1337                # starting table will either be the entity relation or one of the entity's
1338                # sub-relations.
1339                $stmt .= " WHERE $pathTables[0].id $qualifier";
1340                # Now we run through the remaining entities in the path, connecting them up.
1341                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1342                    # Connect the current relationship to the preceding entity.
1343                    my ($entity, $rel) = @pathTables[$i-1,$i];
1344                    # The style of connection depends on the direction of the relationship.
1345                    $stmt .= " AND $entity.id = $rel.$keyName";
1346                    if ($i + 1 <= $#pathTables) {
1347                        # Here there's a next entity, so connect that to the relationship's
1348                        # to-link.
1349                        my $entity2 = $pathTables[$i+1];
1350                        $stmt .= " AND $rel.to_link = $entity2.id";
1351                    }
1352                }
1353                # Now we have our desired DELETE statement.
1354                if ($testFlag) {
1355                    # Here the user wants to trace without executing.
1356                    Trace($stmt) if T(0);
1357                } else {
1358                    # Here we can delete. Note that the SQL method dies with a confessing
1359                    # if an error occurs, so we just go ahead and do it.
1360                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1361                    my $rv = $db->SQL($stmt, 0, $objectID);
1362                    # Accumulate the statistics for this delete. The only rows deleted
1363                    # are from the target table, so we use its name to record the
1364                    # statistic.
1365                    $retVal->Add($target, $rv);
1366                }
1367            }
1368        }
1369        # Return the result.
1370      return $retVal;      return $retVal;
1371  }  }
1372    
# Line 1294  Line 1672 
1672    
1673  =item RETURN  =item RETURN
1674    
1675  Returns a statistical object containing the number of records read and a list of  Returns a statistical object containing a list of the error messages.
 the error messages.  
1676    
1677  =back  =back
1678    
# Line 1568  Line 1945 
1945      } else {      } else {
1946          push @parmList, $parameterList;          push @parmList, $parameterList;
1947      }      }
     # Create the query.  
     my $query = $self->Get($objectNames, $filterClause, @parmList);  
     # Set up a counter of the number of records read.  
     my $fetched = 0;  
1948      # Insure the counter has a value.      # Insure the counter has a value.
1949      if (!defined $count) {      if (!defined $count) {
1950          $count = 0;          $count = 0;
1951      }      }
1952        # Add the row limit to the filter clause.
1953        if ($count > 0) {
1954            $filterClause .= " LIMIT $count";
1955        }
1956        # Create the query.
1957        my $query = $self->Get($objectNames, $filterClause, @parmList);
1958        # Set up a counter of the number of records read.
1959        my $fetched = 0;
1960      # Loop through the records returned, extracting the fields. Note that if the      # Loop through the records returned, extracting the fields. Note that if the
1961      # counter is non-zero, we stop when the number of records read hits the count.      # counter is non-zero, we stop when the number of records read hits the count.
1962      my @retVal = ();      my @retVal = ();
# Line 1626  Line 2007 
2007      return $retVal;      return $retVal;
2008  }  }
2009    
2010    =head3 GetFieldTable
2011    
2012    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
2013    
2014    Get the field structure for a specified entity or relationship.
2015    
2016    =over 4
2017    
2018    =item objectName
2019    
2020    Name of the desired entity or relationship.
2021    
2022    =item RETURN
2023    
2024    The table containing the field descriptors for the specified object.
2025    
2026    =back
2027    
2028    =cut
2029    
2030    sub GetFieldTable {
2031        # Get the parameters.
2032        my ($self, $objectName) = @_;
2033        # Get the descriptor from the metadata.
2034        my $objectData = $self->_GetStructure($objectName);
2035        # Return the object's field table.
2036        return $objectData->{Fields};
2037    }
2038    
2039    =head3 GetUsefulCrossValues
2040    
2041    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
2042    
2043    Return a list of the useful attributes that would be returned by a B<Cross> call
2044    from an entity of the source entity type through the specified relationship. This
2045    means it will return the fields of the target entity type and the intersection data
2046    fields in the relationship. Only primary table fields are returned. In other words,
2047    the field names returned will be for fields where there is always one and only one
2048    value.
2049    
2050    =over 4
2051    
2052    =item sourceEntity
2053    
2054    Name of the entity from which the relationship crossing will start.
2055    
2056    =item relationship
2057    
2058    Name of the relationship being crossed.
2059    
2060    =item RETURN
2061    
2062    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2063    
2064    =back
2065    
2066    =cut
2067    #: Return Type @;
2068    sub GetUsefulCrossValues {
2069        # Get the parameters.
2070        my ($self, $sourceEntity, $relationship) = @_;
2071        # Declare the return variable.
2072        my @retVal = ();
2073        # Determine the target entity for the relationship. This is whichever entity is not
2074        # the source entity. So, if the source entity is the FROM, we'll get the name of
2075        # the TO, and vice versa.
2076        my $relStructure = $self->_GetStructure($relationship);
2077        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2078        my $targetEntity = $relStructure->{$targetEntityType};
2079        # Get the field table for the entity.
2080        my $entityFields = $self->GetFieldTable($targetEntity);
2081        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2082        # For the entity fields, the key aspect of the target structure is that the {relation} value
2083        # must match the entity name.
2084        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2085                            keys %{$entityFields};
2086        # Push the fields found onto the return variable.
2087        push @retVal, sort @fieldList;
2088        # Get the field table for the relationship.
2089        my $relationshipFields = $self->GetFieldTable($relationship);
2090        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2091        # This may end up being an empty set.
2092        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2093                            keys %{$relationshipFields};
2094        # Push these onto the return list.
2095        push @retVal, sort @fieldList2;
2096        # Return the result.
2097        return @retVal;
2098    }
2099    
2100  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2101    
2102  =head3 GetLoadStats  =head3 GetLoadStats
# Line 1637  Line 2108 
2108  =cut  =cut
2109    
2110  sub _GetLoadStats {  sub _GetLoadStats {
2111      return Stats->new('records');      return Stats->new();
2112  }  }
2113    
2114  =head3 GenerateFields  =head3 GenerateFields
# Line 1832  Line 2303 
2303      return $objectData->{Relations};      return $objectData->{Relations};
2304  }  }
2305    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
2306  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2307    
2308  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
# Line 2214  Line 2656 
2656              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
2657              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
2658              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
2659              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
2660              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
2661                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
2662                      # Here the relationship is recursive.                      # Here the relationship is recursive.
# Line 2303  Line 2745 
2745      return $metadata;      return $metadata;
2746  }  }
2747    
2748    =head3 SortNeeded
2749    
2750    C<< my $flag = $erdb->SortNeeded($relationName); >>
2751    
2752    Return TRUE if the specified relation should be sorted during loading to remove duplicate keys,
2753    else FALSE.
2754    
2755    =over 4
2756    
2757    =item relationName
2758    
2759    Name of the relation to be examined.
2760    
2761    =item RETURN
2762    
2763    Returns TRUE if the relation needs a sort, else FALSE.
2764    
2765    =back
2766    
2767    =cut
2768    #: Return Type $;
2769    sub SortNeeded {
2770        # Get the parameters.
2771        my ($self, $relationName) = @_;
2772        # Declare the return variable.
2773        my $retVal = 0;
2774        # Find out if the relation is a primary entity relation.
2775        my $entityTable = $self->{_metaData}->{Entities};
2776        if (exists $entityTable->{$relationName}) {
2777            my $keyType = $entityTable->{$relationName}->{keyType};
2778            Trace("Relation $relationName found in entity table with key type $keyType.") if T(3);
2779            # If the key is not a hash string, we must do the sort.
2780            if ($keyType ne 'hash-string') {
2781                $retVal = 1;
2782            }
2783        }
2784        # Return the result.
2785        return $retVal;
2786    }
2787    
2788  =head3 CreateRelationshipIndex  =head3 CreateRelationshipIndex
2789    
2790  Create an index for a relationship's relation.  Create an index for a relationship's relation.

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.44

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3