[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Sun Aug 14 23:32:08 2005 UTC revision 1.30, Wed Jan 11 19:39:03 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 507  Line 508 
508          # Separate out the source, the target, and the join clause.          # Separate out the source, the target, and the join clause.
509          $joinKey =~ m!^([^/]+)/(.+)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
510          my ($sourceRelation, $targetRelation) = ($1, $2);          my ($sourceRelation, $targetRelation) = ($1, $2);
511          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
512          my $source = $self->ComputeObjectSentence($sourceRelation);          my $source = $self->ComputeObjectSentence($sourceRelation);
513          my $target = $self->ComputeObjectSentence($targetRelation);          my $target = $self->ComputeObjectSentence($targetRelation);
514          my $clause = $joinTable->{$joinKey};          my $clause = $joinTable->{$joinKey};
# Line 552  Line 553 
553  sub CreateTables {  sub CreateTables {
554      # Get the parameters.      # Get the parameters.
555      my ($self) = @_;      my ($self) = @_;
556      my $metadata = $self->{_metaData};      # Get the relation names.
557      my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
558      # Loop through the entities.      # Loop through the relations.
559      my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
     for my $entityName (keys %{$entityHash}) {  
         my $entityData = $entityHash->{$entityName};  
         # Tell the user what we're doing.  
         Trace("Creating relations for entity $entityName.") if T(1);  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entityData->{Relations}}) {  
560              # Create a table for this relation.              # Create a table for this relation.
561              $self->CreateTable($relationName);              $self->CreateTable($relationName);
562              Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
         }  
     }  
     # Loop through the relationships.  
     my $relationshipTable = $metadata->{Relationships};  
     for my $relationshipName (keys %{$metadata->{Relationships}}) {  
         # Create a table for this relationship.  
         Trace("Creating relationship $relationshipName.") if T(1);  
         $self->CreateTable($relationshipName);  
563      }      }
564  }  }
565    
# Line 673  Line 660 
660          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
661          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
662          # Create the index.          # Create the index.
663          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
664                                        flds => $flds, unique => $unique);
665            if ($rv) {
666          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
667            } else {
668                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
669            }
670      }      }
671  }  }
672    
# Line 723  Line 715 
715      $directoryName =~ s!/\\$!!;      $directoryName =~ s!/\\$!!;
716      # Declare the return variable.      # Declare the return variable.
717      my $retVal = Stats->new();      my $retVal = Stats->new();
718      # Get the metadata structure.      # Get the relation names.
719      my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
720      # Loop through the entities.      for my $relationName (@relNames) {
     for my $entity (values %{$metaData->{Entities}}) {  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entity->{Relations}}) {  
721              # Try to load this relation.              # Try to load this relation.
722              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
723              # Accumulate the statistics.              # Accumulate the statistics.
724              $retVal->Accumulate($result);              $retVal->Accumulate($result);
725          }          }
     }  
     # Loop through the relationships.  
     for my $relationshipName (keys %{$metaData->{Relationships}}) {  
         # Try to load this relationship's relation.  
         my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
         # Accumulate the statistics.  
         $retVal->Accumulate($result);  
     }  
726      # Add the duration of the load to the statistical object.      # Add the duration of the load to the statistical object.
727      $retVal->Add('duration', gettimeofday - $startTime);      $retVal->Add('duration', gettimeofday - $startTime);
728      # Return the accumulated statistics.      # Return the accumulated statistics.
729      return $retVal;      return $retVal;
730  }  }
731    
732    
733  =head3 GetTableNames  =head3 GetTableNames
734    
735  C<< my @names = $erdb->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
# Line 782  Line 764 
764      return sort keys %{$entityList};      return sort keys %{$entityList};
765  }  }
766    
767    =head3 IsEntity
768    
769    C<< my $flag = $erdb->IsEntity($entityName); >>
770    
771    Return TRUE if the parameter is an entity name, else FALSE.
772    
773    =over 4
774    
775    =item entityName
776    
777    Object name to be tested.
778    
779    =item RETURN
780    
781    Returns TRUE if the specified string is an entity name, else FALSE.
782    
783    =back
784    
785    =cut
786    
787    sub IsEntity {
788        # Get the parameters.
789        my ($self, $entityName) = @_;
790        # Test to see if it's an entity.
791        return exists $self->{_metaData}->{Entities}->{$entityName};
792    }
793    
794  =head3 Get  =head3 Get
795    
796  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
# Line 842  Line 851 
851    
852  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
853    
854    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
855    be processed. The idea is to make it less likely to find the verb by accident.
856    
857  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
858  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
859  relation.  relation.
# Line 950  Line 962 
962                  $lastObject = $thisObject;                  $lastObject = $thisObject;
963              }              }
964          }          }
965          # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
966          # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
967            # We'll put the ORDER BY / LIMIT clauses in the following variable.
968          my $orderClause = "";          my $orderClause = "";
969          # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
970          if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
971              # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
972                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
973              my $pos = pos $filterString;              my $pos = pos $filterString;
974              $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
975              $filterString = $1;              $filterString = $1;
976          }          }
977          # Add the filter and the join clauses (if any) to the SELECT command.          # Add the filter and the join clauses (if any) to the SELECT command.
# Line 967  Line 981 
981          if (@joinWhere) {          if (@joinWhere) {
982              $command .= " WHERE " . join(' AND ', @joinWhere);              $command .= " WHERE " . join(' AND ', @joinWhere);
983          }          }
984          # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
985          if ($orderClause) {          if ($orderClause) {
986              $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
987          }          }
988      }      }
989      Trace("SQL query: $command") if T(3);      Trace("SQL query: $command") if T(SQL => 4);
990      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
991      my $sth = $dbh->prepare_command($command);      my $sth = $dbh->prepare_command($command);
992      # Execute it with the parameters bound in.      # Execute it with the parameters bound in.
993      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());      $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
# Line 1284  Line 1298 
1298    
1299  =item RETURN  =item RETURN
1300    
1301  Returns a statistical object containing the number of records read and a list of  Returns a statistical object containing a list of the error messages.
 the error messages.  
1302    
1303  =back  =back
1304    
# Line 1299  Line 1312 
1312      Trace("Loading table $relationName from $fileName") if T(2);      Trace("Loading table $relationName from $fileName") if T(2);
1313      # Get the database handle.      # Get the database handle.
1314      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1315        # Get the input file size.
1316        my $fileSize = -s $fileName;
1317      # Get the relation data.      # Get the relation data.
1318      my $relation = $self->_FindRelation($relationName);      my $relation = $self->_FindRelation($relationName);
1319      # Check the truncation flag.      # Check the truncation flag.
1320      if ($truncateFlag) {      if ($truncateFlag) {
1321          Trace("Creating table $relationName") if T(2);          Trace("Creating table $relationName") if T(2);
1322            # Compute the row count estimate. We take the size of the load file,
1323            # divide it by the estimated row size, and then multiply by 1.5 to
1324            # leave extra room. We postulate a minimum row count of 1000 to
1325            # prevent problems with incoming empty load files.
1326            my $rowSize = $self->EstimateRowSize($relationName);
1327            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1328          # Re-create the table without its index.          # Re-create the table without its index.
1329          $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1330          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
1331          if ($dbh->{_preIndex}) {          if ($dbh->{_preIndex}) {
1332              eval {              eval {
# Line 1316  Line 1337 
1337              }              }
1338          }          }
1339      }      }
     # Determine whether or not this is a primary relation. Primary relations have an extra  
     # field indicating whether or not a given object is new or was loaded from the flat files.  
     my $primary = $self->_IsPrimary($relationName);  
     # Get the number of fields in this relation.  
     my @fieldList = @{$relation->{Fields}};  
     my $fieldCount = @fieldList;  
     # Start a database transaction.  
     $dbh->begin_tran;  
     # Open the relation file. We need to create a cleaned-up copy before loading.  
     open TABLEIN, '<', $fileName;  
     my $tempName = "$fileName.tbl";  
     open TABLEOUT, '>', $tempName;  
     my $inputCount = 0;  
     # Loop through the file.  
     while (<TABLEIN>) {  
         $inputCount++;  
         # Chop off the new-line character.  
         my $record = Tracer::Strip($_);  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Insure the number of fields is correct.  
             my @fields = split /\t/, $record;  
             while (@fields > $fieldCount) {  
                 my $extraField = $fields[$#fields];  
                 delete $fields[$#fields];  
                 if ($extraField) {  
                     Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1);  
                 }  
             }  
             while (@fields < $fieldCount) {  
                 push @fields, "";  
             }  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 push @fields, "0";  
             }  
             # Write the record.  
             $record = join "\t", @fields;  
             print TABLEOUT "$record\n";  
             # Count the record written.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         } else {  
             # Here we have a blank record.  
             $retVal->Add('skipped');  
         }  
     }  
     # Close the files.  
     close TABLEIN;  
     close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(2);  
1340      # Load the table.      # Load the table.
1341      my $rv;      my $rv;
1342      eval {      eval {
1343          $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1344      };      };
1345      if (!defined $rv) {      if (!defined $rv) {
1346          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1347          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1348          Trace("Table load failed for $relationName.") if T(1);          Trace("Table load failed for $relationName.") if T(1);
1349      } else {      } else {
1350          # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1351          Trace("$retVal->{records} records read for $relationName.") if T(2);          $retVal->Add("tables");
1352            my $size = -s $fileName;
1353            Trace("$size bytes loaded into $relationName.") if T(2);
1354          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
1355          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1356              eval {              eval {
# Line 1392  Line 1360 
1360                  $retVal->AddMessage($@);                  $retVal->AddMessage($@);
1361              }              }
1362          }          }
         # Analyze the table to help optimize tables.  
1363      }      }
1364      # Commit the database changes.      # Analyze the table to improve performance.
     $dbh->commit_tran;  
1365      $dbh->vacuum_it($relationName);      $dbh->vacuum_it($relationName);
     # Delete the temporary file.  
     unlink $tempName;  
1366      # Return the statistics.      # Return the statistics.
1367      return $retVal;      return $retVal;
1368  }  }
# Line 1607  Line 1571 
1571      } else {      } else {
1572          push @parmList, $parameterList;          push @parmList, $parameterList;
1573      }      }
     # Create the query.  
     my $query = $self->Get($objectNames, $filterClause, @parmList);  
     # Set up a counter of the number of records read.  
     my $fetched = 0;  
1574      # Insure the counter has a value.      # Insure the counter has a value.
1575      if (!defined $count) {      if (!defined $count) {
1576          $count = 0;          $count = 0;
1577      }      }
1578        # Add the row limit to the filter clause.
1579        if ($count > 0) {
1580            $filterClause .= " LIMIT $count";
1581        }
1582        # Create the query.
1583        my $query = $self->Get($objectNames, $filterClause, @parmList);
1584        # Set up a counter of the number of records read.
1585        my $fetched = 0;
1586      # Loop through the records returned, extracting the fields. Note that if the      # Loop through the records returned, extracting the fields. Note that if the
1587      # counter is non-zero, we stop when the number of records read hits the count.      # counter is non-zero, we stop when the number of records read hits the count.
1588      my @retVal = ();      my @retVal = ();
# Line 1676  Line 1644 
1644  =cut  =cut
1645    
1646  sub _GetLoadStats {  sub _GetLoadStats {
1647      return Stats->new('records');      return Stats->new();
1648  }  }
1649    
1650  =head3 GenerateFields  =head3 GenerateFields
# Line 2247  Line 2215 
2215          my @fromList = ();          my @fromList = ();
2216          my @toList = ();          my @toList = ();
2217          my @bothList = ();          my @bothList = ();
2218          Trace("Join table build for $entityName.") if T(4);          Trace("Join table build for $entityName.") if T(metadata => 4);
2219          for my $relationshipName (keys %{$relationshipList}) {          for my $relationshipName (keys %{$relationshipList}) {
2220              my $relationship = $relationshipList->{$relationshipName};              my $relationship = $relationshipList->{$relationshipName};
2221              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
# Line 2258  Line 2226 
2226                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
2227                      # Here the relationship is recursive.                      # Here the relationship is recursive.
2228                      push @bothList, $relationshipName;                      push @bothList, $relationshipName;
2229                      Trace("Relationship $relationshipName put in both-list.") if T(4);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2230                  } else {                  } else {
2231                      # Here the relationship comes from the entity.                      # Here the relationship comes from the entity.
2232                      push @fromList, $relationshipName;                      push @fromList, $relationshipName;
2233                      Trace("Relationship $relationshipName put in from-list.") if T(4);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2234                  }                  }
2235              } elsif ($toEntity eq $entityName) {              } elsif ($toEntity eq $entityName) {
2236                  # Here the relationship goes to the entity.                  # Here the relationship goes to the entity.
2237                  push @toList, $relationshipName;                  push @toList, $relationshipName;
2238                  Trace("Relationship $relationshipName put in to-list.") if T(4);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2239              }              }
2240          }          }
2241          # Create the nonrecursive joins. Note that we build two hashes for running          # Create the nonrecursive joins. Note that we build two hashes for running
# Line 2283  Line 2251 
2251                  # Create joins between the entity and this relationship.                  # Create joins between the entity and this relationship.
2252                  my $linkField = "$relationshipName.${linkType}_link";                  my $linkField = "$relationshipName.${linkType}_link";
2253                  my $joinClause = "$entityName.id = $linkField";                  my $joinClause = "$entityName.id = $linkField";
2254                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2255                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2256                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2257                  # Create joins between this relationship and the other relationships.                  # Create joins between this relationship and the other relationships.
# Line 2304  Line 2272 
2272                              # relationship and itself are prohibited.                              # relationship and itself are prohibited.
2273                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2274                              $joinTable{$joinKey} = $relJoinClause;                              $joinTable{$joinKey} = $relJoinClause;
2275                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2276                          }                          }
2277                      }                      }
2278                  }                  }
# Line 2313  Line 2281 
2281                  # relationship can only be ambiguous with another recursive relationship,                  # relationship can only be ambiguous with another recursive relationship,
2282                  # and the incoming relationship from the outer loop is never recursive.                  # and the incoming relationship from the outer loop is never recursive.
2283                  for my $otherName (@bothList) {                  for my $otherName (@bothList) {
2284                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(4);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2285                      # Join from the left.                      # Join from the left.
2286                      $joinTable{"$relationshipName/$otherName"} =                      $joinTable{"$relationshipName/$otherName"} =
2287                          "$linkField = $otherName.from_link";                          "$linkField = $otherName.from_link";
# Line 2328  Line 2296 
2296          # rise to situations where we can't create the path we want; however, it is always          # rise to situations where we can't create the path we want; however, it is always
2297          # possible to get the same effect using multiple queries.          # possible to get the same effect using multiple queries.
2298          for my $relationshipName (@bothList) {          for my $relationshipName (@bothList) {
2299              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(4);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2300              # Join to the entity from each direction.              # Join to the entity from each direction.
2301              $joinTable{"$entityName/$relationshipName"} =              $joinTable{"$entityName/$relationshipName"} =
2302                  "$entityName.id = $relationshipName.from_link";                  "$entityName.id = $relationshipName.from_link";

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.30

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3