[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.33, Sat Jan 28 09:36:47 2006 UTC revision 1.40, Wed Mar 29 20:03:05 2006 UTC
# Line 877  Line 877 
877    
878  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
879  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
880  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
881  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
882  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
883  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
884  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
885    
886    If an entity or relationship is mentioned twice, the name for the second occurrence will
887    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
888    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
889    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
890    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
891    
892  =over 4  =over 4
893    
894  =item objectNames  =item objectNames
# Line 913  Line 918 
918  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
919  relation.  relation.
920    
921    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
922    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
923    a positive number. So, for example
924    
925    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
926    
927    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
928    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
929    use
930    
931    C<< "LIMIT 10" >>
932    
933  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
934    
935  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 928  Line 945 
945  sub Get {  sub Get {
946      # Get the parameters.      # Get the parameters.
947      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, @params) = @_;
948        # Adjust the list of object names to account for multiple occurrences of the
949        # same object. We start with a hash table keyed on object name that will
950        # return the object suffix. The first time an object is encountered it will
951        # not be found in the hash. The next time the hash will map the object name
952        # to 2, then 3, and so forth.
953        my %objectHash = ();
954        # This list will contain the object names as they are to appear in the
955        # FROM list.
956        my @fromList = ();
957        # This list contains the suffixed object name for each object. It is exactly
958        # parallel to the list in the $objectNames parameter.
959        my @mappedNameList = ();
960        # Finally, this hash translates from a mapped name to its original object name.
961        my %mappedNameHash = ();
962        # Now we create the lists. Note that for every single name we push something into
963        # @fromList and @mappedNameList. This insures that those two arrays are exactly
964        # parallel to $objectNames.
965        for my $objectName (@{$objectNames}) {
966            # Get the next suffix for this object.
967            my $suffix = $objectHash{$objectName};
968            if (! $suffix) {
969                # Here we are seeing the object for the first time. The object name
970                # is used as is.
971                push @mappedNameList, $objectName;
972                push @fromList, $objectName;
973                $mappedNameHash{$objectName} = $objectName;
974                # Denote the next suffix will be 2.
975                $objectHash{$objectName} = 2;
976            } else {
977                # Here we've seen the object before. We construct a new name using
978                # the suffix from the hash and update the hash.
979                my $mappedName = "$objectName$suffix";
980                $objectHash{$objectName} = $suffix + 1;
981                # The FROM list has the object name followed by the mapped name. This
982                # tells SQL it's still the same table, but we're using a different name
983                # for it to avoid confusion.
984                push @fromList, "$objectName $mappedName";
985                # The mapped-name list contains the real mapped name.
986                push @mappedNameList, $mappedName;
987                # Finally, enable us to get back from the mapped name to the object name.
988                $mappedNameHash{$mappedName} = $objectName;
989            }
990        }
991      # Construct the SELECT statement. The general pattern is      # Construct the SELECT statement. The general pattern is
992      #      #
993      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
994      #      #
995      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
996      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
997                  join(', ', @{$objectNames});                  join(', ', @fromList);
998      # Check for a filter clause.      # Check for a filter clause.
999      if ($filterClause) {      if ($filterClause) {
1000          # Here we have one, so we convert its field names and add it to the query. First,          # Here we have one, so we convert its field names and add it to the query. First,
# Line 942  Line 1002 
1002          my $filterString = $filterClause;          my $filterString = $filterClause;
1003          # Next, we sort the object names by length. This helps protect us from finding          # Next, we sort the object names by length. This helps protect us from finding
1004          # object names inside other object names when we're doing our search and replace.          # object names inside other object names when we're doing our search and replace.
1005          my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1006          # We will also keep a list of conditions to add to the WHERE clause in order to link          # We will also keep a list of conditions to add to the WHERE clause in order to link
1007          # entities and relationships as well as primary relations to secondary ones.          # entities and relationships as well as primary relations to secondary ones.
1008          my @joinWhere = ();          my @joinWhere = ();
1009          # The final preparatory step is to create a hash table of relation names. The          # The final preparatory step is to create a hash table of relation names. The
1010          # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1011          my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1012          for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1013              $fromNames{$objectName} = 1;          # occurring or optional fields.
1014          }          my %fromNames = map { $_ => 1 } @sortedNames;
1015          # We are ready to begin. We loop through the object names, replacing each          # We are ready to begin. We loop through the object names, replacing each
1016          # object name's field references by the corresponding SQL field reference.          # object name's field references by the corresponding SQL field reference.
1017          # Along the way, if we find a secondary relation, we will need to add it          # Along the way, if we find a secondary relation, we will need to add it
1018          # to the FROM clause.          # to the FROM clause.
1019          for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1020              # Get the length of the object name plus 2. This is the value we add to the              # Get the length of the object name plus 2. This is the value we add to the
1021              # size of the field name to determine the size of the field reference as a              # size of the field name to determine the size of the field reference as a
1022              # whole.              # whole.
1023              my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1024                # Get the real object name for this mapped name.
1025                my $objectName = $mappedNameHash{$mappedName};
1026                Trace("Processing $mappedName for object $objectName.") if T(4);
1027              # Get the object's field list.              # Get the object's field list.
1028              my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1029              # Find the field references for this object.              # Find the field references for this object.
1030              while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1031                  # At this point, $1 contains the field name, and the current position                  # At this point, $1 contains the field name, and the current position
1032                  # is set immediately after the final parenthesis. We pull out the name of                  # is set immediately after the final parenthesis. We pull out the name of
1033                  # the field and the position and length of the field reference as a whole.                  # the field and the position and length of the field reference as a whole.
# Line 975  Line 1038 
1038                  if (!exists $fieldList->{$fieldName}) {                  if (!exists $fieldList->{$fieldName}) {
1039                      Confess("Field $fieldName not found for object $objectName.");                      Confess("Field $fieldName not found for object $objectName.");
1040                  } else {                  } else {
1041                        Trace("Processing $fieldName at position $pos.") if T(4);
1042                      # Get the field's relation.                      # Get the field's relation.
1043                      my $relationName = $fieldList->{$fieldName}->{relation};                      my $relationName = $fieldList->{$fieldName}->{relation};
1044                        # Now we have a secondary relation. We need to insure it matches the
1045                        # mapped name of the primary relation. First we peel off the suffix
1046                        # from the mapped name.
1047                        my $mappingSuffix = substr $mappedName, length($objectName);
1048                        # Put the mapping suffix onto the relation name to get the
1049                        # mapped relation name.
1050                        my $mappedRelationName = "$relationName$mappingSuffix";
1051                      # Insure the relation is in the FROM clause.                      # Insure the relation is in the FROM clause.
1052                      if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1053                          # Add the relation to the FROM clause.                          # Add the relation to the FROM clause.
1054                            if ($mappedRelationName eq $relationName) {
1055                                # The name is un-mapped, so we add it without
1056                                # any frills.
1057                          $command .= ", $relationName";                          $command .= ", $relationName";
                         # Create its join sub-clause.  
1058                          push @joinWhere, "$objectName.id = $relationName.id";                          push @joinWhere, "$objectName.id = $relationName.id";
1059                          # Denote we have it available for future fields.                          } else {
1060                          $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1061                                $command .= ", $relationName $mappedRelationName";
1062                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1063                            }
1064                            # Denote we have this relation available for future fields.
1065                            $fromNames{$mappedRelationName} = 1;
1066                      }                      }
1067                      # Form an SQL field reference from the relation name and the field name.                      # Form an SQL field reference from the relation name and the field name.
1068                      my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1069                      # Put it into the filter string in place of the old value.                      # Put it into the filter string in place of the old value.
1070                      substr($filterString, $pos, $len) = $sqlReference;                      substr($filterString, $pos, $len) = $sqlReference;
1071                      # Reposition the search.                      # Reposition the search.
1072                      pos $filterString = $pos + length $sqlReference;                      pos $filterString = $pos + length $sqlReference;
1073                        Trace("New filter string is \"$filterString\".") if T(4);
1074                  }                  }
1075              }              }
1076          }          }
# Line 999  Line 1078 
1078          # is more than one object in the object list. We start with the first object and          # is more than one object in the object list. We start with the first object and
1079          # run through the objects after it. Note also that we make a safety copy of the          # run through the objects after it. Note also that we make a safety copy of the
1080          # list before running through it.          # list before running through it.
1081          my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1082          my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1083          # Get the join table.          # Get the join table.
1084          my $joinTable = $self->{_metaData}->{Joins};          my $joinTable = $self->{_metaData}->{Joins};
1085          # Loop through the object list.          # Loop through the object list.
1086          for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1087              # Look for a join.              # Look for a join using the real object names.
1088                my $lastObject = $mappedNameHash{$lastMappedObject};
1089                my $thisObject = $mappedNameHash{$thisMappedObject};
1090              my $joinKey = "$lastObject/$thisObject";              my $joinKey = "$lastObject/$thisObject";
1091              if (!exists $joinTable->{$joinKey}) {              if (!exists $joinTable->{$joinKey}) {
1092                  # Here there's no join, so we throw an error.                  # Here there's no join, so we throw an error.
1093                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1094              } else {              } else {
1095                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1096                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1097                    # Fix the names.
1098                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1099                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1100                    push @joinWhere, $unMappedJoin;
1101                  # Save this object as the last object for the next iteration.                  # Save this object as the last object for the next iteration.
1102                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1103              }              }
1104          }          }
1105          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
# Line 1046  Line 1131 
1131      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
1132      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
1133      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1134        # Now we create the relation map, which enables DBQuery to determine the order, name
1135        # and mapped name for each object in the query.
1136        my @relationMap = ();
1137        for my $mappedName (@mappedNameList) {
1138            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1139        }
1140      # Return the statement object.      # Return the statement object.
1141      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1142      return $retVal;      return $retVal;
1143  }  }
1144    
# Line 1146  Line 1237 
1237                          # the current entity, so we need to stack it.                          # the current entity, so we need to stack it.
1238                          my @stackList = (@augmentedList, $toEntity);                          my @stackList = (@augmentedList, $toEntity);
1239                          push @fromPathList, \@stackList;                          push @fromPathList, \@stackList;
1240                        } else {
1241                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1242                      }                      }
1243                  }                  }
1244              }              }
# Line 1171  Line 1264 
1264      for my $keyName ('to_link', 'from_link') {      for my $keyName ('to_link', 'from_link') {
1265          # Get the list for this key.          # Get the list for this key.
1266          my @pathList = @{$stackList{$keyName}};          my @pathList = @{$stackList{$keyName}};
1267            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1268          # Loop through this list.          # Loop through this list.
1269          while (my $path = pop @pathList) {          while (my $path = pop @pathList) {
1270              # Get the table whose rows are to be deleted.              # Get the table whose rows are to be deleted.
1271              my @pathTables = @{$path};              my @pathTables = @{$path};
1272              # Start the DELETE statement.              # Start the DELETE statement. We need to call DBKernel because the
1273                # syntax of a DELETE-USING varies among DBMSs.
1274              my $target = $pathTables[$#pathTables];              my $target = $pathTables[$#pathTables];
1275              my $stmt = "DELETE FROM $target";              my $stmt = $db->SetUsing(@pathTables);
             # If there's more than just the one table, we need a USING clause.  
             if (@pathTables > 1) {  
                 $stmt .= " USING " . join(", ", @pathTables[0 .. ($#pathTables - 1)]);  
             }  
1276              # Now start the WHERE. The first thing is the ID field from the starting table. That              # Now start the WHERE. The first thing is the ID field from the starting table. That
1277              # starting table will either be the entity relation or one of the entity's              # starting table will either be the entity relation or one of the entity's
1278              # sub-relations.              # sub-relations.
# Line 1191  Line 1282 
1282                  # Connect the current relationship to the preceding entity.                  # Connect the current relationship to the preceding entity.
1283                  my ($entity, $rel) = @pathTables[$i-1,$i];                  my ($entity, $rel) = @pathTables[$i-1,$i];
1284                  # The style of connection depends on the direction of the relationship.                  # The style of connection depends on the direction of the relationship.
1285                  $stmt .= " AND $entity.id = $rel.from_link";                  $stmt .= " AND $entity.id = $rel.$keyName";
1286                  if ($i + 1 <= $#pathTables) {                  if ($i + 1 <= $#pathTables) {
1287                      # Here there's a next entity, so connect that to the relationship's                      # Here there's a next entity, so connect that to the relationship's
1288                      # to-link.                      # to-link.
1289                      my $entity2 = $pathTables[$i+1];                      my $entity2 = $pathTables[$i+1];
1290                      $stmt .= " AND $rel.$keyName = $entity2.id";                      $stmt .= " AND $rel.to_link = $entity2.id";
1291                  }                  }
1292              }              }
1293              # Now we have our desired DELETE statement.              # Now we have our desired DELETE statement.
# Line 1206  Line 1297 
1297              } else {              } else {
1298                  # Here we can delete. Note that the SQL method dies with a confessing                  # Here we can delete. Note that the SQL method dies with a confessing
1299                  # if an error occurs, so we just go ahead and do it.                  # if an error occurs, so we just go ahead and do it.
1300                  Trace("Executing delete: $stmt") if T(3);                  Trace("Executing delete from $target using '$objectID'.") if T(3);
1301                  my $rv = $db->SQL($stmt, 0, [$objectID]);                  my $rv = $db->SQL($stmt, 0, $objectID);
1302                  # Accumulate the statistics for this delete. The only rows deleted                  # Accumulate the statistics for this delete. The only rows deleted
1303                  # are from the target table, so we use its name to record the                  # are from the target table, so we use its name to record the
1304                  # statistic.                  # statistic.
# Line 1856  Line 1947 
1947      return $retVal;      return $retVal;
1948  }  }
1949    
1950    =head3 GetFieldTable
1951    
1952    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
1953    
1954    Get the field structure for a specified entity or relationship.
1955    
1956    =over 4
1957    
1958    =item objectName
1959    
1960    Name of the desired entity or relationship.
1961    
1962    =item RETURN
1963    
1964    The table containing the field descriptors for the specified object.
1965    
1966    =back
1967    
1968    =cut
1969    
1970    sub GetFieldTable {
1971        # Get the parameters.
1972        my ($self, $objectName) = @_;
1973        # Get the descriptor from the metadata.
1974        my $objectData = $self->_GetStructure($objectName);
1975        # Return the object's field table.
1976        return $objectData->{Fields};
1977    }
1978    
1979    =head3 GetUsefulCrossValues
1980    
1981    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
1982    
1983    Return a list of the useful attributes that would be returned by a B<Cross> call
1984    from an entity of the source entity type through the specified relationship. This
1985    means it will return the fields of the target entity type and the intersection data
1986    fields in the relationship. Only primary table fields are returned. In other words,
1987    the field names returned will be for fields where there is always one and only one
1988    value.
1989    
1990    =over 4
1991    
1992    =item sourceEntity
1993    
1994    Name of the entity from which the relationship crossing will start.
1995    
1996    =item relationship
1997    
1998    Name of the relationship being crossed.
1999    
2000    =item RETURN
2001    
2002    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2003    
2004    =back
2005    
2006    =cut
2007    #: Return Type @;
2008    sub GetUsefulCrossValues {
2009        # Get the parameters.
2010        my ($self, $sourceEntity, $relationship) = @_;
2011        # Declare the return variable.
2012        my @retVal = ();
2013        # Determine the target entity for the relationship. This is whichever entity is not
2014        # the source entity. So, if the source entity is the FROM, we'll get the name of
2015        # the TO, and vice versa.
2016        my $relStructure = $self->_GetStructure($relationship);
2017        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2018        my $targetEntity = $relStructure->{$targetEntityType};
2019        # Get the field table for the entity.
2020        my $entityFields = $self->GetFieldTable($targetEntity);
2021        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2022        # For the entity fields, the key aspect of the target structure is that the {relation} value
2023        # must match the entity name.
2024        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2025                            keys %{$entityFields};
2026        # Push the fields found onto the return variable.
2027        push @retVal, sort @fieldList;
2028        # Get the field table for the relationship.
2029        my $relationshipFields = $self->GetFieldTable($relationship);
2030        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2031        # This may end up being an empty set.
2032        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2033                            keys %{$relationshipFields};
2034        # Push these onto the return list.
2035        push @retVal, sort @fieldList2;
2036        # Return the result.
2037        return @retVal;
2038    }
2039    
2040  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2041    
2042  =head3 GetLoadStats  =head3 GetLoadStats
# Line 2062  Line 2243 
2243      return $objectData->{Relations};      return $objectData->{Relations};
2244  }  }
2245    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
2246  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2247    
2248  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names

Legend:
Removed from v.1.33  
changed lines
  Added in v.1.40

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3