[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.33, Sat Jan 28 09:36:47 2006 UTC revision 1.39, Sun Mar 26 17:24:55 2006 UTC
# Line 877  Line 877 
877    
878  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
879  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
880  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
881  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
882  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
883  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
884  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
885    
886    If an entity or relationship is mentioned twice, the name for the second occurrence will
887    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
888    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
889    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
890    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
891    
892  =over 4  =over 4
893    
894  =item objectNames  =item objectNames
# Line 913  Line 918 
918  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
919  relation.  relation.
920    
921    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
922    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
923    a positive number. So, for example
924    
925    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
926    
927    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
928    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
929    use
930    
931    C<< "LIMIT 10" >>
932    
933  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
934    
935  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 928  Line 945 
945  sub Get {  sub Get {
946      # Get the parameters.      # Get the parameters.
947      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, @params) = @_;
948        # Adjust the list of object names to account for multiple occurrences of the
949        # same object. We start with a hash table keyed on object name that will
950        # return the object suffix. The first time an object is encountered it will
951        # not be found in the hash. The next time the hash will map the object name
952        # to 2, then 3, and so forth.
953        my %objectHash = ();
954        # This list will contain the object names as they are to appear in the
955        # FROM list.
956        my @fromList = ();
957        # This list contains the suffixed object name for each object. It is exactly
958        # parallel to the list in the $objectNames parameter.
959        my @mappedNameList = ();
960        # Finally, this hash translates from a mapped name to its original object name.
961        my %mappedNameHash = ();
962        # Now we create the lists. Note that for every single name we push something into
963        # @fromList and @mappedNameList. This insures that those two arrays are exactly
964        # parallel to $objectNames.
965        for my $objectName (@{$objectNames}) {
966            # Get the next suffix for this object.
967            my $suffix = $objectHash{$objectName};
968            if (! $suffix) {
969                # Here we are seeing the object for the first time. The object name
970                # is used as is.
971                push @mappedNameList, $objectName;
972                push @fromList, $objectName;
973                $mappedNameHash{$objectName} = $objectName;
974                # Denote the next suffix will be 2.
975                $objectHash{$objectName} = 2;
976            } else {
977                # Here we've seen the object before. We construct a new name using
978                # the suffix from the hash and update the hash.
979                my $mappedName = "$objectName$suffix";
980                $objectHash{$objectName} = $suffix + 1;
981                # The FROM list has the object name followed by the mapped name. This
982                # tells SQL it's still the same table, but we're using a different name
983                # for it to avoid confusion.
984                push @fromList, "$objectName $mappedName";
985                # The mapped-name list contains the real mapped name.
986                push @mappedNameList, $mappedName;
987                # Finally, enable us to get back from the mapped name to the object name.
988                $mappedNameHash{$mappedName} = $objectName;
989            }
990        }
991      # Construct the SELECT statement. The general pattern is      # Construct the SELECT statement. The general pattern is
992      #      #
993      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
994      #      #
995      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
996      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
997                  join(', ', @{$objectNames});                  join(', ', @fromList);
998      # Check for a filter clause.      # Check for a filter clause.
999      if ($filterClause) {      if ($filterClause) {
1000          # Here we have one, so we convert its field names and add it to the query. First,          # Here we have one, so we convert its field names and add it to the query. First,
# Line 942  Line 1002 
1002          my $filterString = $filterClause;          my $filterString = $filterClause;
1003          # Next, we sort the object names by length. This helps protect us from finding          # Next, we sort the object names by length. This helps protect us from finding
1004          # object names inside other object names when we're doing our search and replace.          # object names inside other object names when we're doing our search and replace.
1005          my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1006          # We will also keep a list of conditions to add to the WHERE clause in order to link          # We will also keep a list of conditions to add to the WHERE clause in order to link
1007          # entities and relationships as well as primary relations to secondary ones.          # entities and relationships as well as primary relations to secondary ones.
1008          my @joinWhere = ();          my @joinWhere = ();
1009          # The final preparatory step is to create a hash table of relation names. The          # The final preparatory step is to create a hash table of relation names. The
1010          # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1011          my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1012          for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1013              $fromNames{$objectName} = 1;          # occurring or optional fields.
1014          }          my %fromNames = map { $_ => 1 } @sortedNames;
1015          # We are ready to begin. We loop through the object names, replacing each          # We are ready to begin. We loop through the object names, replacing each
1016          # object name's field references by the corresponding SQL field reference.          # object name's field references by the corresponding SQL field reference.
1017          # Along the way, if we find a secondary relation, we will need to add it          # Along the way, if we find a secondary relation, we will need to add it
1018          # to the FROM clause.          # to the FROM clause.
1019          for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1020              # Get the length of the object name plus 2. This is the value we add to the              # Get the length of the object name plus 2. This is the value we add to the
1021              # size of the field name to determine the size of the field reference as a              # size of the field name to determine the size of the field reference as a
1022              # whole.              # whole.
1023              my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1024                # Get the real object name for this mapped name.
1025                my $objectName = $mappedNameHash{$mappedName};
1026              # Get the object's field list.              # Get the object's field list.
1027              my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1028              # Find the field references for this object.              # Find the field references for this object.
1029              while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1030                  # At this point, $1 contains the field name, and the current position                  # At this point, $1 contains the field name, and the current position
1031                  # is set immediately after the final parenthesis. We pull out the name of                  # is set immediately after the final parenthesis. We pull out the name of
1032                  # the field and the position and length of the field reference as a whole.                  # the field and the position and length of the field reference as a whole.
# Line 977  Line 1039 
1039                  } else {                  } else {
1040                      # Get the field's relation.                      # Get the field's relation.
1041                      my $relationName = $fieldList->{$fieldName}->{relation};                      my $relationName = $fieldList->{$fieldName}->{relation};
1042                        # Now we have a secondary relation. We need to insure it matches the
1043                        # mapped name of the primary relation. First we peel off the suffix
1044                        # from the mapped name.
1045                        my $mappingSuffix = substr $mappedName, length($objectName);
1046                        # Put the mapping suffix onto the relation name to get the
1047                        # mapped relation name.
1048                        my $mappedRelationName = "$relationName$mappingSuffix";
1049                      # Insure the relation is in the FROM clause.                      # Insure the relation is in the FROM clause.
1050                      if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1051                          # Add the relation to the FROM clause.                          # Add the relation to the FROM clause.
1052                            if ($mappedRelationName eq $relationName) {
1053                                # The name is un-mapped, so we add it without
1054                                # any frills.
1055                          $command .= ", $relationName";                          $command .= ", $relationName";
                         # Create its join sub-clause.  
1056                          push @joinWhere, "$objectName.id = $relationName.id";                          push @joinWhere, "$objectName.id = $relationName.id";
1057                          # Denote we have it available for future fields.                          } else {
1058                          $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1059                                $command .= ", $relationName $mappedRelationName";
1060                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1061                            }
1062                            # Denote we have this relation available for future fields.
1063                            $fromNames{$mappedRelationName} = 1;
1064                      }                      }
1065                      # Form an SQL field reference from the relation name and the field name.                      # Form an SQL field reference from the relation name and the field name.
1066                      my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1067                      # Put it into the filter string in place of the old value.                      # Put it into the filter string in place of the old value.
1068                      substr($filterString, $pos, $len) = $sqlReference;                      substr($filterString, $pos, $len) = $sqlReference;
1069                      # Reposition the search.                      # Reposition the search.
# Line 999  Line 1075 
1075          # is more than one object in the object list. We start with the first object and          # is more than one object in the object list. We start with the first object and
1076          # run through the objects after it. Note also that we make a safety copy of the          # run through the objects after it. Note also that we make a safety copy of the
1077          # list before running through it.          # list before running through it.
1078          my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1079          my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1080          # Get the join table.          # Get the join table.
1081          my $joinTable = $self->{_metaData}->{Joins};          my $joinTable = $self->{_metaData}->{Joins};
1082          # Loop through the object list.          # Loop through the object list.
1083          for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1084              # Look for a join.              # Look for a join using the real object names.
1085                my $lastObject = $mappedNameHash{$lastMappedObject};
1086                my $thisObject = $mappedNameHash{$thisMappedObject};
1087              my $joinKey = "$lastObject/$thisObject";              my $joinKey = "$lastObject/$thisObject";
1088              if (!exists $joinTable->{$joinKey}) {              if (!exists $joinTable->{$joinKey}) {
1089                  # Here there's no join, so we throw an error.                  # Here there's no join, so we throw an error.
1090                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1091              } else {              } else {
1092                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1093                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1094                    # Fix the names.
1095                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1096                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1097                    push @joinWhere, $unMappedJoin;
1098                  # Save this object as the last object for the next iteration.                  # Save this object as the last object for the next iteration.
1099                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1100              }              }
1101          }          }
1102          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
# Line 1046  Line 1128 
1128      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
1129      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
1130      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1131        # Now we create the relation map, which enables DBQuery to determine the order, name
1132        # and mapped name for each object in the query.
1133        my @relationMap = ();
1134        for my $mappedName (@mappedNameList) {
1135            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1136        }
1137      # Return the statement object.      # Return the statement object.
1138      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1139      return $retVal;      return $retVal;
1140  }  }
1141    
# Line 1146  Line 1234 
1234                          # the current entity, so we need to stack it.                          # the current entity, so we need to stack it.
1235                          my @stackList = (@augmentedList, $toEntity);                          my @stackList = (@augmentedList, $toEntity);
1236                          push @fromPathList, \@stackList;                          push @fromPathList, \@stackList;
1237                        } else {
1238                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1239                      }                      }
1240                  }                  }
1241              }              }
# Line 1171  Line 1261 
1261      for my $keyName ('to_link', 'from_link') {      for my $keyName ('to_link', 'from_link') {
1262          # Get the list for this key.          # Get the list for this key.
1263          my @pathList = @{$stackList{$keyName}};          my @pathList = @{$stackList{$keyName}};
1264            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1265          # Loop through this list.          # Loop through this list.
1266          while (my $path = pop @pathList) {          while (my $path = pop @pathList) {
1267              # Get the table whose rows are to be deleted.              # Get the table whose rows are to be deleted.
1268              my @pathTables = @{$path};              my @pathTables = @{$path};
1269              # Start the DELETE statement.              # Start the DELETE statement. We need to call DBKernel because the
1270                # syntax of a DELETE-USING varies among DBMSs.
1271              my $target = $pathTables[$#pathTables];              my $target = $pathTables[$#pathTables];
1272              my $stmt = "DELETE FROM $target";              my $stmt = $db->SetUsing(@pathTables);
             # If there's more than just the one table, we need a USING clause.  
             if (@pathTables > 1) {  
                 $stmt .= " USING " . join(", ", @pathTables[0 .. ($#pathTables - 1)]);  
             }  
1273              # Now start the WHERE. The first thing is the ID field from the starting table. That              # Now start the WHERE. The first thing is the ID field from the starting table. That
1274              # starting table will either be the entity relation or one of the entity's              # starting table will either be the entity relation or one of the entity's
1275              # sub-relations.              # sub-relations.
# Line 1191  Line 1279 
1279                  # Connect the current relationship to the preceding entity.                  # Connect the current relationship to the preceding entity.
1280                  my ($entity, $rel) = @pathTables[$i-1,$i];                  my ($entity, $rel) = @pathTables[$i-1,$i];
1281                  # The style of connection depends on the direction of the relationship.                  # The style of connection depends on the direction of the relationship.
1282                  $stmt .= " AND $entity.id = $rel.from_link";                  $stmt .= " AND $entity.id = $rel.$keyName";
1283                  if ($i + 1 <= $#pathTables) {                  if ($i + 1 <= $#pathTables) {
1284                      # Here there's a next entity, so connect that to the relationship's                      # Here there's a next entity, so connect that to the relationship's
1285                      # to-link.                      # to-link.
1286                      my $entity2 = $pathTables[$i+1];                      my $entity2 = $pathTables[$i+1];
1287                      $stmt .= " AND $rel.$keyName = $entity2.id";                      $stmt .= " AND $rel.to_link = $entity2.id";
1288                  }                  }
1289              }              }
1290              # Now we have our desired DELETE statement.              # Now we have our desired DELETE statement.
# Line 1206  Line 1294 
1294              } else {              } else {
1295                  # Here we can delete. Note that the SQL method dies with a confessing                  # Here we can delete. Note that the SQL method dies with a confessing
1296                  # if an error occurs, so we just go ahead and do it.                  # if an error occurs, so we just go ahead and do it.
1297                  Trace("Executing delete: $stmt") if T(3);                  Trace("Executing delete from $target using '$objectID'.") if T(3);
1298                  my $rv = $db->SQL($stmt, 0, [$objectID]);                  my $rv = $db->SQL($stmt, 0, $objectID);
1299                  # Accumulate the statistics for this delete. The only rows deleted                  # Accumulate the statistics for this delete. The only rows deleted
1300                  # are from the target table, so we use its name to record the                  # are from the target table, so we use its name to record the
1301                  # statistic.                  # statistic.
# Line 1856  Line 1944 
1944      return $retVal;      return $retVal;
1945  }  }
1946    
1947    =head3 GetFieldTable
1948    
1949    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
1950    
1951    Get the field structure for a specified entity or relationship.
1952    
1953    =over 4
1954    
1955    =item objectName
1956    
1957    Name of the desired entity or relationship.
1958    
1959    =item RETURN
1960    
1961    The table containing the field descriptors for the specified object.
1962    
1963    =back
1964    
1965    =cut
1966    
1967    sub GetFieldTable {
1968        # Get the parameters.
1969        my ($self, $objectName) = @_;
1970        # Get the descriptor from the metadata.
1971        my $objectData = $self->_GetStructure($objectName);
1972        # Return the object's field table.
1973        return $objectData->{Fields};
1974    }
1975    
1976    =head3 GetUsefulCrossValues
1977    
1978    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
1979    
1980    Return a list of the useful attributes that would be returned by a B<Cross> call
1981    from an entity of the source entity type through the specified relationship. This
1982    means it will return the fields of the target entity type and the intersection data
1983    fields in the relationship. Only primary table fields are returned. In other words,
1984    the field names returned will be for fields where there is always one and only one
1985    value.
1986    
1987    =over 4
1988    
1989    =item sourceEntity
1990    
1991    Name of the entity from which the relationship crossing will start.
1992    
1993    =item relationship
1994    
1995    Name of the relationship being crossed.
1996    
1997    =item RETURN
1998    
1999    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2000    
2001    =back
2002    
2003    =cut
2004    #: Return Type @;
2005    sub GetUsefulCrossValues {
2006        # Get the parameters.
2007        my ($self, $sourceEntity, $relationship) = @_;
2008        # Declare the return variable.
2009        my @retVal = ();
2010        # Determine the target entity for the relationship. This is whichever entity is not
2011        # the source entity. So, if the source entity is the FROM, we'll get the name of
2012        # the TO, and vice versa.
2013        my $relStructure = $self->_GetStructure($relationship);
2014        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2015        my $targetEntity = $relStructure->{$targetEntityType};
2016        # Get the field table for the entity.
2017        my $entityFields = $self->GetFieldTable($targetEntity);
2018        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2019        # For the entity fields, the key aspect of the target structure is that the {relation} value
2020        # must match the entity name.
2021        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2022                            keys %{$entityFields};
2023        # Push the fields found onto the return variable.
2024        push @retVal, sort @fieldList;
2025        # Get the field table for the relationship.
2026        my $relationshipFields = $self->GetFieldTable($relationship);
2027        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2028        # This may end up being an empty set.
2029        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2030                            keys %{$relationshipFields};
2031        # Push these onto the return list.
2032        push @retVal, sort @fieldList2;
2033        # Return the result.
2034        return @retVal;
2035    }
2036    
2037  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2038    
2039  =head3 GetLoadStats  =head3 GetLoadStats
# Line 2062  Line 2240 
2240      return $objectData->{Relations};      return $objectData->{Relations};
2241  }  }
2242    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
2243  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2244    
2245  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names

Legend:
Removed from v.1.33  
changed lines
  Added in v.1.39

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3