[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.23, Thu Sep 15 01:46:56 2005 UTC revision 1.70, Fri Oct 13 21:45:11 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13      use FIG;      use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
# Line 90  Line 91 
91    
92  32-bit signed integer  32-bit signed integer
93    
94    =item counter
95    
96    32-bit unsigned integer
97    
98  =item date  =item date
99    
100  64-bit unsigned integer, representing a PERL date/time value  64-bit unsigned integer, representing a PERL date/time value
# Line 109  Line 114 
114  compatability with certain database packages), but the only values supported are  compatability with certain database packages), but the only values supported are
115  0 and 1.  0 and 1.
116    
117    =item id-string
118    
119    variable-length string, maximum 25 characters
120    
121  =item key-string  =item key-string
122    
123  variable-length string, maximum 40 characters  variable-length string, maximum 40 characters
# Line 125  Line 134 
134    
135  variable-length string, maximum 255 characters  variable-length string, maximum 255 characters
136    
137    =item hash-string
138    
139    variable-length string, maximum 22 characters
140    
141  =back  =back
142    
143    The hash-string data type has a special meaning. The actual key passed into the loader will
144    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
145    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
146    digest. Therefore, it is presumed the keys will be unique. When the database is actually
147    in use, the hashed keys will be presented rather than the original values. For this reason,
148    they should not be used for entities where the key is meaningful.
149    
150  =head3 Global Tags  =head3 Global Tags
151    
152  The entire database definition must be inside a B<Database> tag. The display name of  The entire database definition must be inside a B<Database> tag. The display name of
# Line 170  Line 190 
190    
191  Name of the field. The field name should contain only letters, digits, and hyphens (C<->),  Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
192  and the first character should be a letter. Most underlying databases are case-insensitive  and the first character should be a letter. Most underlying databases are case-insensitive
193  with the respect to field names, so a best practice is to use lower-case letters only.  with the respect to field names, so a best practice is to use lower-case letters only. Finally,
194    the name C<search-relevance> has special meaning for full-text searches and should not be
195    used as a field name.
196    
197  =item type  =item type
198    
# Line 189  Line 211 
211  entity, the fields without a relation attribute are said to belong to the  entity, the fields without a relation attribute are said to belong to the
212  I<primary relation>. This relation has the same name as the entity itself.  I<primary relation>. This relation has the same name as the entity itself.
213    
214    =item searchable
215    
216    If specified, then the field is a candidate for full-text searching. A single full-text
217    index will be created for each relation with at least one searchable field in it.
218    For best results, this option should only be used for string or text fields.
219    
220  =back  =back
221    
222  =head3 Indexes  =head3 Indexes
# Line 302  Line 330 
330  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
331  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
332  # string is specified in the field definition. "avgLen" is the average byte length for estimating  # string is specified in the field definition. "avgLen" is the average byte length for estimating
333  # record sizes.  # record sizes. "sort" is the key modifier for the sort command.
334  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, sort => "",  dataGen => "StringGen('A')" },
335                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, sort => "n", dataGen => "IntGen(0, 99999999)" },
336                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },                    counter => { sqlType => 'INTEGER UNSIGNED',   maxLen => 20,           avgLen =>   4, sort => "n", dataGen => "IntGen(0, 99999999)" },
337                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, sort => "",  dataGen => "StringGen(IntGen(10,250))" },
338                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, sort => "",  dataGen => "StringGen(IntGen(80,1000))" },
339                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, sort => "n", dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
340                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   2, dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, sort => "g", dataGen => "FloatGen(0.0, 100.0)" },
341                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, sort => "n", dataGen => "IntGen(0, 1)" },
342                     'hash-string' =>
343                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, sort => "",  dataGen => "SringGen(22)" },
344                     'id-string' =>
345                                 { sqlType => 'VARCHAR(25)',        maxLen => 25,           avgLen =>  25, sort => "",  dataGen => "SringGen(22)" },
346                   'key-string' =>                   'key-string' =>
347                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, sort => "",  dataGen => "StringGen(IntGen(10,40))" },
348                   'name-string' =>                   'name-string' =>
349                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, sort => "",  dataGen => "StringGen(IntGen(10,80))" },
350                   'medium-string' =>                   'medium-string' =>
351                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, sort => "",  dataGen => "StringGen(IntGen(10,160))" },
352                  );                  );
353    
354  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 402  Line 435 
435      # Write the HTML heading stuff.      # Write the HTML heading stuff.
436      print HTMLOUT "<html>\n<head>\n<title>$title</title>\n";      print HTMLOUT "<html>\n<head>\n<title>$title</title>\n";
437      print HTMLOUT "</head>\n<body>\n";      print HTMLOUT "</head>\n<body>\n";
438        # Write the documentation.
439        print HTMLOUT $self->DisplayMetaData();
440        # Close the document.
441        print HTMLOUT "</body>\n</html>\n";
442        # Close the file.
443        close HTMLOUT;
444    }
445    
446    =head3 DisplayMetaData
447    
448    C<< my $html = $erdb->DisplayMetaData(); >>
449    
450    Return an HTML description of the database. This description can be used to help users create
451    the data to be loaded into the relations and form queries. The output is raw includable HTML
452    without any HEAD or BODY tags.
453    
454    =over 4
455    
456    =item filename
457    
458    The name of the output file.
459    
460    =back
461    
462    =cut
463    
464    sub DisplayMetaData {
465        # Get the parameters.
466        my ($self) = @_;
467        # Get the metadata and the title string.
468        my $metadata = $self->{_metaData};
469        # Get the title string.
470        my $title = $metadata->{Title};
471        # Get the entity and relationship lists.
472        my $entityList = $metadata->{Entities};
473        my $relationshipList = $metadata->{Relationships};
474        # Declare the return variable.
475        my $retVal = "";
476        # Open the output file.
477        Trace("Building MetaData table of contents.") if T(4);
478      # Here we do the table of contents. It starts as an unordered list of section names. Each      # Here we do the table of contents. It starts as an unordered list of section names. Each
479      # section contains an ordered list of entity or relationship subsections.      # section contains an ordered list of entity or relationship subsections.
480      print HTMLOUT "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n";      $retVal .= "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n";
481      # Loop through the Entities, displaying a list item for each.      # Loop through the Entities, displaying a list item for each.
482      foreach my $key (sort keys %{$entityList}) {      foreach my $key (sort keys %{$entityList}) {
483          # Display this item.          # Display this item.
484          print HTMLOUT "<li><a href=\"#$key\">$key</a></li>\n";          $retVal .= "<li><a href=\"#$key\">$key</a></li>\n";
485      }      }
486      # Close off the entity section and start the relationship section.      # Close off the entity section and start the relationship section.
487      print HTMLOUT "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n";      $retVal .= "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n";
488      # Loop through the Relationships.      # Loop through the Relationships.
489      foreach my $key (sort keys %{$relationshipList}) {      foreach my $key (sort keys %{$relationshipList}) {
490          # Display this item.          # Display this item.
491          my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key});          my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key});
492          print HTMLOUT "<li><a href=\"#$key\">$relationshipTitle</a></li>\n";          $retVal .= "<li><a href=\"#$key\">$relationshipTitle</a></li>\n";
493      }      }
494      # Close off the relationship section and list the join table section.      # Close off the relationship section and list the join table section.
495      print HTMLOUT "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n";      $retVal .= "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n";
496      # Close off the table of contents itself.      # Close off the table of contents itself.
497      print HTMLOUT "</ul>\n";      $retVal .=  "</ul>\n";
498      # Now we start with the actual data. Denote we're starting the entity section.      # Now we start with the actual data. Denote we're starting the entity section.
499      print HTMLOUT "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n";      $retVal .= "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n";
500      # Loop through the entities.      # Loop through the entities.
501      for my $key (sort keys %{$entityList}) {      for my $key (sort keys %{$entityList}) {
502          Trace("Building MetaData entry for $key entity.") if T(4);          Trace("Building MetaData entry for $key entity.") if T(4);
503          # Create the entity header. It contains a bookmark and the entity name.          # Create the entity header. It contains a bookmark and the entity name.
504          print HTMLOUT "<a name=\"$key\"></a><h3>$key</h3>\n";          $retVal .= "<a name=\"$key\"></a><h3>$key</h3>\n";
505          # Get the entity data.          # Get the entity data.
506          my $entityData = $entityList->{$key};          my $entityData = $entityList->{$key};
507          # If there's descriptive text, display it.          # If there's descriptive text, display it.
508          if (my $notes = $entityData->{Notes}) {          if (my $notes = $entityData->{Notes}) {
509              print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n";              $retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n";
510          }          }
511          # Now we want a list of the entity's relationships. First, we set up the relationship subsection.          # Now we want a list of the entity's relationships. First, we set up the relationship subsection.
512          print HTMLOUT "<h4>Relationships for <b>$key</b></h4>\n<ul>\n";          $retVal .= "<h4>Relationships for <b>$key</b></h4>\n<ul>\n";
513          # Loop through the relationships.          # Loop through the relationships.
514          for my $relationship (sort keys %{$relationshipList}) {          for my $relationship (sort keys %{$relationshipList}) {
515              # Get the relationship data.              # Get the relationship data.
# Line 446  Line 519 
519                  # Get the relationship sentence and append the arity.                  # Get the relationship sentence and append the arity.
520                  my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure);                  my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure);
521                  # Display the relationship data.                  # Display the relationship data.
522                  print HTMLOUT "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n";                  $retVal .= "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n";
523              }              }
524          }          }
525          # Close off the relationship list.          # Close off the relationship list.
526          print HTMLOUT "</ul>\n";          $retVal .= "</ul>\n";
527          # Get the entity's relations.          # Get the entity's relations.
528          my $relationList = $entityData->{Relations};          my $relationList = $entityData->{Relations};
529          # Create a header for the relation subsection.          # Create a header for the relation subsection.
530          print HTMLOUT "<h4>Relations for <b>$key</b></h4>\n";          $retVal .= "<h4>Relations for <b>$key</b></h4>\n";
531          # Loop through the relations, displaying them.          # Loop through the relations, displaying them.
532          for my $relation (sort keys %{$relationList}) {          for my $relation (sort keys %{$relationList}) {
533              my $htmlString = _ShowRelationTable($relation, $relationList->{$relation});              my $htmlString = _ShowRelationTable($relation, $relationList->{$relation});
534              print HTMLOUT $htmlString;              $retVal .= $htmlString;
535          }          }
536      }      }
537      # Denote we're starting the relationship section.      # Denote we're starting the relationship section.
538      print HTMLOUT "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n";      $retVal .= "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n";
539      # Loop through the relationships.      # Loop through the relationships.
540      for my $key (sort keys %{$relationshipList}) {      for my $key (sort keys %{$relationshipList}) {
541          Trace("Building MetaData entry for $key relationship.") if T(4);          Trace("Building MetaData entry for $key relationship.") if T(4);
# Line 470  Line 543 
543          my $relationshipStructure = $relationshipList->{$key};          my $relationshipStructure = $relationshipList->{$key};
544          # Create the relationship header.          # Create the relationship header.
545          my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure);          my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure);
546          print HTMLOUT "<h3><a name=\"$key\"></a>$headerText</h3>\n";          $retVal .= "<h3><a name=\"$key\"></a>$headerText</h3>\n";
547          # Get the entity names.          # Get the entity names.
548          my $fromEntity = $relationshipStructure->{from};          my $fromEntity = $relationshipStructure->{from};
549          my $toEntity = $relationshipStructure->{to};          my $toEntity = $relationshipStructure->{to};
# Line 480  Line 553 
553          # since both sentences will say the same thing.          # since both sentences will say the same thing.
554          my $arity = $relationshipStructure->{arity};          my $arity = $relationshipStructure->{arity};
555          if ($arity eq "11") {          if ($arity eq "11") {
556              print HTMLOUT "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n";              $retVal .= "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n";
557          } else {          } else {
558              print HTMLOUT "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n";              $retVal .= "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n";
559              if ($arity eq "MM" && $fromEntity ne $toEntity) {              if ($arity eq "MM" && $fromEntity ne $toEntity) {
560                  print HTMLOUT "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n";                  $retVal .= "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n";
561              }              }
562          }          }
563          print HTMLOUT "</p>\n";          $retVal .= "</p>\n";
564          # If there are notes on this relationship, display them.          # If there are notes on this relationship, display them.
565          if (my $notes = $relationshipStructure->{Notes}) {          if (my $notes = $relationshipStructure->{Notes}) {
566              print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n";              $retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n";
567          }          }
568          # Generate the relationship's relation table.          # Generate the relationship's relation table.
569          my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key});          my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key});
570          print HTMLOUT $htmlString;          $retVal .= $htmlString;
571      }      }
572      Trace("Building MetaData join table.") if T(4);      Trace("Building MetaData join table.") if T(4);
573      # Denote we're starting the join table.      # Denote we're starting the join table.
574      print HTMLOUT "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n";      $retVal .= "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n";
575      # Create a table header.      # Create a table header.
576      print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");      $retVal .= _OpenTable("Join Table", "Source", "Target", "Join Condition");
577      # Loop through the joins.      # Loop through the joins.
578      my $joinTable = $metadata->{Joins};      my $joinTable = $metadata->{Joins};
579      my @joinKeys = keys %{$joinTable};      my @joinKeys = keys %{$joinTable};
# Line 508  Line 581 
581          # Separate out the source, the target, and the join clause.          # Separate out the source, the target, and the join clause.
582          $joinKey =~ m!^([^/]+)/(.+)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
583          my ($sourceRelation, $targetRelation) = ($1, $2);          my ($sourceRelation, $targetRelation) = ($1, $2);
584          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
585          my $source = $self->ComputeObjectSentence($sourceRelation);          my $source = $self->ComputeObjectSentence($sourceRelation);
586          my $target = $self->ComputeObjectSentence($targetRelation);          my $target = $self->ComputeObjectSentence($targetRelation);
587          my $clause = $joinTable->{$joinKey};          my $clause = $joinTable->{$joinKey};
588          # Display them in a table row.          # Display them in a table row.
589          print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";          $retVal .= "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";
590      }      }
591      # Close the table.      # Close the table.
592      print HTMLOUT _CloseTable();      $retVal .= _CloseTable();
593      # Close the document.      Trace("Built MetaData HTML.") if T(3);
594      print HTMLOUT "</body>\n</html>\n";      # Return the HTML.
595      # Close the file.      return $retVal;
     close HTMLOUT;  
     Trace("Built MetaData web page.") if T(3);  
596  }  }
597    
598  =head3 DumpMetaData  =head3 DumpMetaData
# Line 626  Line 697 
697      Trace("Creating table $relationName: $fieldThing") if T(2);      Trace("Creating table $relationName: $fieldThing") if T(2);
698      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
699      Trace("Relation $relationName created in database.") if T(2);      Trace("Relation $relationName created in database.") if T(2);
700      # If we want to build the indexes, we do it here.      # If we want to build the indexes, we do it here. Note that the full-text search
701        # index will not be built until the table has been loaded.
702      if ($indexFlag) {      if ($indexFlag) {
703          $self->CreateIndex($relationName);          $self->CreateIndex($relationName);
704      }      }
705  }  }
706    
707    =head3 VerifyFields
708    
709    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
710    
711    Run through the list of proposed field values, insuring that all the character fields are
712    below the maximum length. If any fields are too long, they will be truncated in place.
713    
714    =over 4
715    
716    =item relName
717    
718    Name of the relation for which the specified fields are destined.
719    
720    =item fieldList
721    
722    Reference to a list, in order, of the fields to be put into the relation.
723    
724    =item RETURN
725    
726    Returns the number of fields truncated.
727    
728    =back
729    
730    =cut
731    
732    sub VerifyFields {
733        # Get the parameters.
734        my ($self, $relName, $fieldList) = @_;
735        # Initialize the return value.
736        my $retVal = 0;
737        # Get the relation definition.
738        my $relData = $self->_FindRelation($relName);
739        # Get the list of field descriptors.
740        my $fieldTypes = $relData->{Fields};
741        my $fieldCount = scalar @{$fieldTypes};
742        # Loop through the two lists.
743        for (my $i = 0; $i < $fieldCount; $i++) {
744            # Get the type of the current field.
745            my $fieldType = $fieldTypes->[$i]->{type};
746            # If it's a character field, verify the length.
747            if ($fieldType =~ /string/) {
748                my $maxLen = $TypeTable{$fieldType}->{maxLen};
749                my $oldString = $fieldList->[$i];
750                if (length($oldString) > $maxLen) {
751                    # Here it's too big, so we truncate it.
752                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
753                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
754                    $retVal++;
755                }
756            }
757        }
758        # Return the truncation count.
759        return $retVal;
760    }
761    
762    =head3 DigestFields
763    
764    C<< $erdb->DigestFields($relName, $fieldList); >>
765    
766    Digest the strings in the field list that correspond to data type C<hash-string> in the
767    specified relation.
768    
769    =over 4
770    
771    =item relName
772    
773    Name of the relation to which the fields belong.
774    
775    =item fieldList
776    
777    List of field contents to be loaded into the relation.
778    
779    =back
780    
781    =cut
782    #: Return Type ;
783    sub DigestFields {
784        # Get the parameters.
785        my ($self, $relName, $fieldList) = @_;
786        # Get the relation definition.
787        my $relData = $self->_FindRelation($relName);
788        # Get the list of field descriptors.
789        my $fieldTypes = $relData->{Fields};
790        my $fieldCount = scalar @{$fieldTypes};
791        # Loop through the two lists.
792        for (my $i = 0; $i < $fieldCount; $i++) {
793            # Get the type of the current field.
794            my $fieldType = $fieldTypes->[$i]->{type};
795            # If it's a hash string, digest it in place.
796            if ($fieldType eq 'hash-string') {
797                $fieldList->[$i] = $self->DigestKey($fieldList->[$i]);
798            }
799        }
800    }
801    
802    =head3 DigestKey
803    
804    C<< my $digested = $erdb->DigestKey($keyValue); >>
805    
806    Return the digested value of a symbolic key. The digested value can then be plugged into a
807    key-based search into a table with key-type hash-string.
808    
809    Currently the digesting process is independent of the database structure, but that may not
810    always be the case, so this is an instance method instead of a static method.
811    
812    =over 4
813    
814    =item keyValue
815    
816    Key value to digest.
817    
818    =item RETURN
819    
820    Digested value of the key.
821    
822    =back
823    
824    =cut
825    
826    sub DigestKey {
827        # Get the parameters.
828        my ($self, $keyValue) = @_;
829        # Compute the digest.
830        my $retVal = md5_base64($keyValue);
831        # Return the result.
832        return $retVal;
833    }
834    
835  =head3 CreateIndex  =head3 CreateIndex
836    
837  C<< $erdb->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
# Line 658  Line 858 
858          my @fieldList = _FixNames(@{$indexData->{IndexFields}});          my @fieldList = _FixNames(@{$indexData->{IndexFields}});
859          my $flds = join(', ', @fieldList);          my $flds = join(', ', @fieldList);
860          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
861          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? 'unique' : undef);
862          # Create the index.          # Create the index.
863          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
864                                        flds => $flds, kind => $unique);
865            if ($rv) {
866          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
867            } else {
868                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
869            }
870      }      }
871  }  }
872    
# Line 788  Line 993 
993    
994  =head3 Get  =head3 Get
995    
996  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, \@params); >>
997    
998  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
999  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 796  Line 1001 
1001  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
1002  $genus.  $genus.
1003    
1004  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>
1005    
1006  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
1007  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
# Line 813  Line 1018 
1018  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
1019  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
1020    
1021  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>
1022    
1023  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
1024  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
1025  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
1026  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
1027  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
1028  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
1029  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
1030    
1031    If an entity or relationship is mentioned twice, the name for the second occurrence will
1032    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
1033    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
1034    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
1035    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
1036    
1037  =over 4  =over 4
1038    
1039  =item objectNames  =item objectNames
# Line 846  Line 1056 
1056    
1057  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1058    
1059    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
1060    be processed. The idea is to make it less likely to find the verb by accident.
1061    
1062  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
1063  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
1064  relation.  relation.
1065    
1066  =item param1, param2, ..., paramN  Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
1067    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
1068    a positive number. So, for example
1069    
1070    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
1071    
1072    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
1073    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
1074    use
1075    
1076    C<< "LIMIT 10" >>
1077    
1078  Parameter values to be substituted into the filter clause.  =item params
1079    
1080    Reference to a list of parameter values to be substituted into the filter clause.
1081    
1082  =item RETURN  =item RETURN
1083    
# Line 864  Line 1089 
1089    
1090  sub Get {  sub Get {
1091      # Get the parameters.      # Get the parameters.
1092      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, $params) = @_;
1093      # Construct the SELECT statement. The general pattern is      # Process the SQL stuff.
1094      #      my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
1095      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN          $self->_SetupSQL($objectNames, $filterClause);
1096      #      # Create the query.
1097      my $dbh = $self->{_dbh};      my $command = "SELECT DISTINCT " . join(".*, ", @{$mappedNameListRef}) .
1098      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .          ".* $suffix";
1099                  join(', ', @{$objectNames});      my $sth = $self->_GetStatementHandle($command, $params);
1100      # Check for a filter clause.      # Now we create the relation map, which enables DBQuery to determine the order, name
1101      if ($filterClause) {      # and mapped name for each object in the query.
1102          # Here we have one, so we convert its field names and add it to the query. First,      my @relationMap = ();
1103          # We create a copy of the filter string we can work with.      for my $mappedName (@{$mappedNameListRef}) {
1104          my $filterString = $filterClause;          push @relationMap, [$mappedName, $mappedNameHashRef->{$mappedName}];
         # Next, we sort the object names by length. This helps protect us from finding  
         # object names inside other object names when we're doing our search and replace.  
         my @sortedNames = sort { length($b) - length($a) } @{$objectNames};  
         # We will also keep a list of conditions to add to the WHERE clause in order to link  
         # entities and relationships as well as primary relations to secondary ones.  
         my @joinWhere = ();  
         # The final preparatory step is to create a hash table of relation names. The  
         # table begins with the relation names already in the SELECT command.  
         my %fromNames = ();  
         for my $objectName (@sortedNames) {  
             $fromNames{$objectName} = 1;  
         }  
         # We are ready to begin. We loop through the object names, replacing each  
         # object name's field references by the corresponding SQL field reference.  
         # Along the way, if we find a secondary relation, we will need to add it  
         # to the FROM clause.  
         for my $objectName (@sortedNames) {  
             # Get the length of the object name plus 2. This is the value we add to the  
             # size of the field name to determine the size of the field reference as a  
             # whole.  
             my $nameLength = 2 + length $objectName;  
             # Get the object's field list.  
             my $fieldList = $self->_GetFieldTable($objectName);  
             # Find the field references for this object.  
             while ($filterString =~ m/$objectName\(([^)]*)\)/g) {  
                 # At this point, $1 contains the field name, and the current position  
                 # is set immediately after the final parenthesis. We pull out the name of  
                 # the field and the position and length of the field reference as a whole.  
                 my $fieldName = $1;  
                 my $len = $nameLength + length $fieldName;  
                 my $pos = pos($filterString) - $len;  
                 # Insure the field exists.  
                 if (!exists $fieldList->{$fieldName}) {  
                     Confess("Field $fieldName not found for object $objectName.");  
                 } else {  
                     # Get the field's relation.  
                     my $relationName = $fieldList->{$fieldName}->{relation};  
                     # Insure the relation is in the FROM clause.  
                     if (!exists $fromNames{$relationName}) {  
                         # Add the relation to the FROM clause.  
                         $command .= ", $relationName";  
                         # Create its join sub-clause.  
                         push @joinWhere, "$objectName.id = $relationName.id";  
                         # Denote we have it available for future fields.  
                         $fromNames{$relationName} = 1;  
                     }  
                     # Form an SQL field reference from the relation name and the field name.  
                     my $sqlReference = "$relationName." . _FixName($fieldName);  
                     # Put it into the filter string in place of the old value.  
                     substr($filterString, $pos, $len) = $sqlReference;  
                     # Reposition the search.  
                     pos $filterString = $pos + length $sqlReference;  
                 }  
             }  
         }  
         # The next step is to join the objects together. We only need to do this if there  
         # is more than one object in the object list. We start with the first object and  
         # run through the objects after it. Note also that we make a safety copy of the  
         # list before running through it.  
         my @objectList = @{$objectNames};  
         my $lastObject = shift @objectList;  
         # Get the join table.  
         my $joinTable = $self->{_metaData}->{Joins};  
         # Loop through the object list.  
         for my $thisObject (@objectList) {  
             # Look for a join.  
             my $joinKey = "$lastObject/$thisObject";  
             if (!exists $joinTable->{$joinKey}) {  
                 # Here there's no join, so we throw an error.  
                 Confess("No join exists to connect from $lastObject to $thisObject.");  
             } else {  
                 # Get the join clause and add it to the WHERE list.  
                 push @joinWhere, $joinTable->{$joinKey};  
                 # Save this object as the last object for the next iteration.  
                 $lastObject = $thisObject;  
             }  
         }  
         # Now we need to handle the whole ORDER BY thing. We'll put the order by clause  
         # in the following variable.  
         my $orderClause = "";  
         # Locate the ORDER BY verb (if any).  
         if ($filterString =~ m/^(.*)ORDER BY/g) {  
             # Here we have an ORDER BY verb. Split it off of the filter string.  
             my $pos = pos $filterString;  
             $orderClause = substr($filterString, $pos);  
             $filterString = $1;  
         }  
         # Add the filter and the join clauses (if any) to the SELECT command.  
         if ($filterString) {  
             push @joinWhere, "($filterString)";  
         }  
         if (@joinWhere) {  
             $command .= " WHERE " . join(' AND ', @joinWhere);  
         }  
         # Add the sort clause (if any) to the SELECT command.  
         if ($orderClause) {  
             $command .= " ORDER BY $orderClause";  
         }  
1105      }      }
     Trace("SQL query: $command") if T(3);  
     Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));  
     my $sth = $dbh->prepare_command($command);  
     # Execute it with the parameters bound in.  
     $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());  
1106      # Return the statement object.      # Return the statement object.
1107      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1108      return $retVal;      return $retVal;
1109  }  }
1110    
1111  =head3 GetList  =head3 Search
   
 C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  
1112    
1113  Return a list of object descriptors for the specified objects as determined by the  C<< my $query = $erdb->Search($searchExpression, $idx, \@objectNames, $filterClause, \@params); >>
 specified filter clause.  
1114    
1115  This method is essentially the same as L</Get> except it returns a list of objects rather  Perform a full text search with filtering. The search will be against a specified object
1116  than a query object that can be used to get the results one record at a time.  in the object name list. That object will get an extra field containing the search
1117    relevance. Note that except for the search expression, the parameters of this method are
1118    the same as those for L</Get> and follow the same rules.
1119    
1120  =over 4  =over 4
1121    
1122    =item searchExpression
1123    
1124    Boolean search expression for the text fields of the target object.
1125    
1126    =item idx
1127    
1128    Index in the I<$objectNames> list of the table to be searched in full-text mode.
1129    
1130  =item objectNames  =item objectNames
1131    
1132  List containing the names of the entity and relationship objects to be retrieved.  List containing the names of the entity and relationship objects to be retrieved.
# Line 1012  Line 1141 
1141  or secondary entity relations; however, all of the entities and relationships involved must  or secondary entity relations; however, all of the entities and relationships involved must
1142  be included in the list of object names.  be included in the list of object names.
1143    
1144  The filter clause can also specify a sort order. To do this, simply follow the filter string  =item params
 with an ORDER BY clause. For example, the following filter string gets all genomes for a  
 particular genus and sorts them by species name.  
   
 C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  
   
 The rules for field references in a sort order are the same as those for field references in the  
 filter clause in general; however, odd things may happen if a sort field is from a secondary  
 relation.  
   
 =item param1, param2, ..., paramN  
1145    
1146  Parameter values to be substituted into the filter clause.  Reference to a list of parameter values to be substituted into the filter clause.
1147    
1148  =item RETURN  =item RETURN
1149    
1150  Returns a list of B<DBObject>s that satisfy the query conditions.  Returns a query object for the specified search.
1151    
1152  =back  =back
1153    
1154  =cut  =cut
1155  #: Return Type @%  
1156  sub GetList {  sub Search {
1157      # Get the parameters.      # Get the parameters.
1158      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $searchExpression, $idx, $objectNames, $filterClause, $params) = @_;
1159      # Declare the return variable.      # Declare the return variable.
1160      my @retVal = ();      my $retVal;
1161      # Perform the query.      # Create a safety copy of the parameter list.
1162      my $query = $self->Get($objectNames, $filterClause, @params);      my @myParams = @{$params};
1163      # Loop through the results.      # Get the first object's structure so we have access to the searchable fields.
1164      while (my $object = $query->Fetch) {      my $object1Name = $objectNames->[$idx];
1165          push @retVal, $object;      my $object1Structure = $self->_GetStructure($object1Name);
1166        # Get the field list.
1167        if (! exists $object1Structure->{searchFields}) {
1168            Confess("No searchable index for $object1Name.");
1169        } else {
1170            # Get the field list.
1171            my @fields = @{$object1Structure->{searchFields}};
1172            # We need two match expressions, one for the filter clause and one in the
1173            # query itself. Both will use a parameter mark, so we need to push the
1174            # search expression onto the front of the parameter list twice.
1175            unshift @myParams, $searchExpression, $searchExpression;
1176            # Build the match expression.
1177            my @matchFilterFields = map { "$object1Name." . _FixName($_) } @fields;
1178            my $matchClause = "MATCH (" . join(", ", @matchFilterFields) . ") AGAINST (? IN BOOLEAN MODE)";
1179            # Process the SQL stuff.
1180            my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
1181                $self->_SetupSQL($objectNames, $filterClause, $matchClause);
1182            # Create the query. Note that the match clause is inserted at the front of
1183            # the select fields.
1184            my $command = "SELECT DISTINCT $matchClause, " . join(".*, ", @{$mappedNameListRef}) .
1185                ".* $suffix";
1186            my $sth = $self->_GetStatementHandle($command, \@myParams);
1187            # Now we create the relation map, which enables DBQuery to determine the order, name
1188            # and mapped name for each object in the query.
1189            my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef);
1190            # Return the statement object.
1191            $retVal = DBQuery::_new($self, $sth, \@relationMap, $object1Name);
1192      }      }
1193      # Return the result.      return $retVal;
     return @retVal;  
1194  }  }
1195    
1196  =head3 ComputeObjectSentence  =head3 GetFlat
1197    
1198  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>  C<< my @list = $erdb->GetFlat(\@objectNames, $filterClause, \@parameterList, $field); >>
1199    
1200  Check an object name, and if it is a relationship convert it to a relationship sentence.  This is a variation of L</GetAll> that asks for only a single field per record and
1201    returns a single flattened list.
1202    
1203  =over 4  =over 4
1204    
1205  =item objectName  =item objectNames
1206    
1207  Name of the entity or relationship.  List containing the names of the entity and relationship objects to be retrieved.
1208    
1209    =item filterClause
1210    
1211    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1212    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1213    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1214    parameter list as additional parameters. The fields in a filter clause can come from primary
1215    entity relations, relationship relations, or secondary entity relations; however, all of the
1216    entities and relationships involved must be included in the list of object names.
1217    
1218    =item parameterList
1219    
1220    List of the parameters to be substituted in for the parameters marks in the filter clause.
1221    
1222    =item field
1223    
1224    Name of the field to be used to get the elements of the list returned.
1225    
1226  =item RETURN  =item RETURN
1227    
1228  Returns a string containing the entity name or a relationship sentence.  Returns a list of values.
1229    
1230  =back  =back
1231    
1232  =cut  =cut
1233    #: Return Type @;
1234  sub ComputeObjectSentence {  sub GetFlat {
1235      # Get the parameters.      # Get the parameters.
1236      my ($self, $objectName) = @_;      my ($self, $objectNames, $filterClause, $parameterList, $field) = @_;
1237      # Set the default return value.      # Construct the query.
1238      my $retVal = $objectName;      my $query = $self->Get($objectNames, $filterClause, $parameterList);
1239      # Look for the object as a relationship.      # Create the result list.
1240      my $relTable = $self->{_metaData}->{Relationships};      my @retVal = ();
1241        # Loop through the records, adding the field values found to the result list.
1242        while (my $row = $query->Fetch()) {
1243            push @retVal, $row->Value($field);
1244        }
1245        # Return the list created.
1246        return @retVal;
1247    }
1248    
1249    =head3 Delete
1250    
1251    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1252    
1253    Delete an entity instance from the database. The instance is deleted along with all entity and
1254    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1255    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1256    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1257    dependent relationship.
1258    
1259    =over 4
1260    
1261    =item entityName
1262    
1263    Name of the entity type for the instance being deleted.
1264    
1265    =item objectID
1266    
1267    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1268    then it is presumed to by a LIKE pattern.
1269    
1270    =item testFlag
1271    
1272    If TRUE, the delete statements will be traced without being executed.
1273    
1274    =item RETURN
1275    
1276    Returns a statistics object indicating how many records of each particular table were
1277    deleted.
1278    
1279    =back
1280    
1281    =cut
1282    #: Return Type $%;
1283    sub Delete {
1284        # Get the parameters.
1285        my ($self, $entityName, $objectID, $testFlag) = @_;
1286        # Declare the return variable.
1287        my $retVal = Stats->new();
1288        # Get the DBKernel object.
1289        my $db = $self->{_dbh};
1290        # We're going to generate all the paths branching out from the starting entity. One of
1291        # the things we have to be careful about is preventing loops. We'll use a hash to
1292        # determine if we've hit a loop.
1293        my %alreadyFound = ();
1294        # These next lists will serve as our result stack. We start by pushing object lists onto
1295        # the stack, and then popping them off to do the deletes. This means the deletes will
1296        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1297        # sure we don't delete records that might be needed to forge relationships back to the
1298        # original item. We have two lists-- one for TO-relationships, and one for
1299        # FROM-relationships and entities.
1300        my @fromPathList = ();
1301        my @toPathList = ();
1302        # This final hash is used to remember what work still needs to be done. We push paths
1303        # onto the list, then pop them off to extend the paths. We prime it with the starting
1304        # point. Note that we will work hard to insure that the last item on a path in the
1305        # TODO list is always an entity.
1306        my @todoList = ([$entityName]);
1307        while (@todoList) {
1308            # Get the current path.
1309            my $current = pop @todoList;
1310            # Copy it into a list.
1311            my @stackedPath = @{$current};
1312            # Pull off the last item on the path. It will always be an entity.
1313            my $entityName = pop @stackedPath;
1314            # Add it to the alreadyFound list.
1315            $alreadyFound{$entityName} = 1;
1316            # Get the entity data.
1317            my $entityData = $self->_GetStructure($entityName);
1318            # The first task is to loop through the entity's relation. A DELETE command will
1319            # be needed for each of them.
1320            my $relations = $entityData->{Relations};
1321            for my $relation (keys %{$relations}) {
1322                my @augmentedList = (@stackedPath, $relation);
1323                push @fromPathList, \@augmentedList;
1324            }
1325            # Now we need to look for relationships connected to this entity.
1326            my $relationshipList = $self->{_metaData}->{Relationships};
1327            for my $relationshipName (keys %{$relationshipList}) {
1328                my $relationship = $relationshipList->{$relationshipName};
1329                # Check the FROM field. We're only interested if it's us.
1330                if ($relationship->{from} eq $entityName) {
1331                    # Add the path to this relationship.
1332                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1333                    push @fromPathList, \@augmentedList;
1334                    # Check the arity. If it's MM we're done. If it's 1M
1335                    # and the target hasn't been seen yet, we want to
1336                    # stack the entity for future processing.
1337                    if ($relationship->{arity} eq '1M') {
1338                        my $toEntity = $relationship->{to};
1339                        if (! exists $alreadyFound{$toEntity}) {
1340                            # Here we have a new entity that's dependent on
1341                            # the current entity, so we need to stack it.
1342                            my @stackList = (@augmentedList, $toEntity);
1343                            push @fromPathList, \@stackList;
1344                        } else {
1345                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1346                        }
1347                    }
1348                }
1349                # Now check the TO field. In this case only the relationship needs
1350                # deletion.
1351                if ($relationship->{to} eq $entityName) {
1352                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1353                    push @toPathList, \@augmentedList;
1354                }
1355            }
1356        }
1357        # Create the first qualifier for the WHERE clause. This selects the
1358        # keys of the primary entity records to be deleted. When we're deleting
1359        # from a dependent table, we construct a join page from the first qualifier
1360        # to the table containing the dependent records to delete.
1361        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1362        # We need to make two passes. The first is through the to-list, and
1363        # the second through the from-list. The from-list is second because
1364        # the to-list may need to pass through some of the entities the
1365        # from-list would delete.
1366        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1367        # Now it's time to do the deletes. We do it in two passes.
1368        for my $keyName ('to_link', 'from_link') {
1369            # Get the list for this key.
1370            my @pathList = @{$stackList{$keyName}};
1371            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1372            # Loop through this list.
1373            while (my $path = pop @pathList) {
1374                # Get the table whose rows are to be deleted.
1375                my @pathTables = @{$path};
1376                # Start the DELETE statement. We need to call DBKernel because the
1377                # syntax of a DELETE-USING varies among DBMSs.
1378                my $target = $pathTables[$#pathTables];
1379                my $stmt = $db->SetUsing(@pathTables);
1380                # Now start the WHERE. The first thing is the ID field from the starting table. That
1381                # starting table will either be the entity relation or one of the entity's
1382                # sub-relations.
1383                $stmt .= " WHERE $pathTables[0].id $qualifier";
1384                # Now we run through the remaining entities in the path, connecting them up.
1385                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1386                    # Connect the current relationship to the preceding entity.
1387                    my ($entity, $rel) = @pathTables[$i-1,$i];
1388                    # The style of connection depends on the direction of the relationship.
1389                    $stmt .= " AND $entity.id = $rel.$keyName";
1390                    if ($i + 1 <= $#pathTables) {
1391                        # Here there's a next entity, so connect that to the relationship's
1392                        # to-link.
1393                        my $entity2 = $pathTables[$i+1];
1394                        $stmt .= " AND $rel.to_link = $entity2.id";
1395                    }
1396                }
1397                # Now we have our desired DELETE statement.
1398                if ($testFlag) {
1399                    # Here the user wants to trace without executing.
1400                    Trace($stmt) if T(0);
1401                } else {
1402                    # Here we can delete. Note that the SQL method dies with a confessing
1403                    # if an error occurs, so we just go ahead and do it.
1404                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1405                    my $rv = $db->SQL($stmt, 0, $objectID);
1406                    # Accumulate the statistics for this delete. The only rows deleted
1407                    # are from the target table, so we use its name to record the
1408                    # statistic.
1409                    $retVal->Add($target, $rv);
1410                }
1411            }
1412        }
1413        # Return the result.
1414        return $retVal;
1415    }
1416    
1417    =head3 SortNeeded
1418    
1419    C<< my $parms = $erdb->SortNeeded($relationName); >>
1420    
1421    Return the pipe command for the sort that should be applied to the specified
1422    relation when creating the load file.
1423    
1424    For example, if the load file should be sorted ascending by the first
1425    field, this method would return
1426    
1427        sort -k1 -t"\t"
1428    
1429    If the first field is numeric, the method would return
1430    
1431        sort -k1n -t"\t"
1432    
1433    Unfortunately, due to a bug in the C<sort> command, we cannot eliminate duplicate
1434    keys using a sort.
1435    
1436    =over 4
1437    
1438    =item relationName
1439    
1440    Name of the relation to be examined.
1441    
1442    =item
1443    
1444    Returns the sort command to use for sorting the relation, suitable for piping.
1445    
1446    =back
1447    
1448    =cut
1449    #: Return Type $;
1450    sub SortNeeded {
1451        # Get the parameters.
1452        my ($self, $relationName) = @_;
1453        # Declare a descriptor to hold the names of the key fields.
1454        my @keyNames = ();
1455        # Get the relation structure.
1456        my $relationData = $self->_FindRelation($relationName);
1457        # Find out if the relation is a primary entity relation,
1458        # a relationship relation, or a secondary entity relation.
1459        my $entityTable = $self->{_metaData}->{Entities};
1460        my $relationshipTable = $self->{_metaData}->{Relationships};
1461        if (exists $entityTable->{$relationName}) {
1462            # Here we have a primary entity relation.
1463            push @keyNames, "id";
1464        } elsif (exists $relationshipTable->{$relationName}) {
1465            # Here we have a relationship. We sort using the FROM index.
1466            my $relationshipData = $relationshipTable->{$relationName};
1467            my $index = $relationData->{Indexes}->{"idx${relationName}From"};
1468            push @keyNames, @{$index->{IndexFields}};
1469        } else {
1470            # Here we have a secondary entity relation, so we have a sort on the ID field.
1471            push @keyNames, "id";
1472        }
1473        # Now we parse the key names into sort parameters. First, we prime the return
1474        # string.
1475        my $retVal = "sort -t\"\t\" ";
1476        # Get the relation's field list.
1477        my @fields = @{$relationData->{Fields}};
1478        # Loop through the keys.
1479        for my $keyData (@keyNames) {
1480            # Get the key and the ordering.
1481            my ($keyName, $ordering);
1482            if ($keyData =~ /^([^ ]+) DESC/) {
1483                ($keyName, $ordering) = ($1, "descending");
1484            } else {
1485                ($keyName, $ordering) = ($keyData, "ascending");
1486            }
1487            # Find the key's position and type.
1488            my $fieldSpec;
1489            for (my $i = 0; $i <= $#fields && ! $fieldSpec; $i++) {
1490                my $thisField = $fields[$i];
1491                if ($thisField->{name} eq $keyName) {
1492                    # Get the sort modifier for this field type. The modifier
1493                    # decides whether we're using a character, numeric, or
1494                    # floating-point sort.
1495                    my $modifier = $TypeTable{$thisField->{type}}->{sort};
1496                    # If the index is descending for this field, denote we want
1497                    # to reverse the sort order on this field.
1498                    if ($ordering eq 'descending') {
1499                        $modifier .= "r";
1500                    }
1501                    # Store the position and modifier into the field spec, which
1502                    # will stop the inner loop. Note that the field number is
1503                    # 1-based in the sort command, so we have to increment the
1504                    # index.
1505                    $fieldSpec = ($i + 1) . $modifier;
1506                }
1507            }
1508            # Add this field to the sort command.
1509            $retVal .= " -k$fieldSpec";
1510        }
1511        # Return the result.
1512        return $retVal;
1513    }
1514    
1515    =head3 GetList
1516    
1517    C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, \@params); >>
1518    
1519    Return a list of object descriptors for the specified objects as determined by the
1520    specified filter clause.
1521    
1522    This method is essentially the same as L</Get> except it returns a list of objects rather
1523    than a query object that can be used to get the results one record at a time.
1524    
1525    =over 4
1526    
1527    =item objectNames
1528    
1529    List containing the names of the entity and relationship objects to be retrieved.
1530    
1531    =item filterClause
1532    
1533    WHERE clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1534    be parameterized with parameter markers (C<?>). Each field used in the WHERE clause must be
1535    specified in the standard form B<I<objectName>(I<fieldName>)>. Any parameters specified
1536    in the filter clause should be added to the parameter list as additional parameters. The
1537    fields in a filter clause can come from primary entity relations, relationship relations,
1538    or secondary entity relations; however, all of the entities and relationships involved must
1539    be included in the list of object names.
1540    
1541    The filter clause can also specify a sort order. To do this, simply follow the filter string
1542    with an ORDER BY clause. For example, the following filter string gets all genomes for a
1543    particular genus and sorts them by species name.
1544    
1545    C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1546    
1547    The rules for field references in a sort order are the same as those for field references in the
1548    filter clause in general; however, odd things may happen if a sort field is from a secondary
1549    relation.
1550    
1551    =item params
1552    
1553    Reference to a list of parameter values to be substituted into the filter clause.
1554    
1555    =item RETURN
1556    
1557    Returns a list of B<DBObject>s that satisfy the query conditions.
1558    
1559    =back
1560    
1561    =cut
1562    #: Return Type @%
1563    sub GetList {
1564        # Get the parameters.
1565        my ($self, $objectNames, $filterClause, $params) = @_;
1566        # Declare the return variable.
1567        my @retVal = ();
1568        # Perform the query.
1569        my $query = $self->Get($objectNames, $filterClause, $params);
1570        # Loop through the results.
1571        while (my $object = $query->Fetch) {
1572            push @retVal, $object;
1573        }
1574        # Return the result.
1575        return @retVal;
1576    }
1577    
1578    =head3 GetCount
1579    
1580    C<< my $count = $erdb->GetCount(\@objectNames, $filter, \@params); >>
1581    
1582    Return the number of rows found by a specified query. This method would
1583    normally be used to count the records in a single table. For example, in a
1584    genetics database
1585    
1586        my $count = $erdb->GetCount(['Genome'], 'Genome(genus-species) LIKE ?', ['homo %']);
1587    
1588    would return the number of genomes for the genus I<homo>. It is conceivable, however,
1589    to use it to return records based on a join. For example,
1590    
1591        my $count = $erdb->GetCount(['HasFeature', 'Genome'], 'Genome(genus-species) LIKE ?',
1592                                    ['homo %']);
1593    
1594    would return the number of features for genomes in the genus I<homo>. Note that
1595    only the rows from the first table are counted. If the above command were
1596    
1597        my $count = $erdb->GetCount(['Genome', 'Feature'], 'Genome(genus-species) LIKE ?',
1598                                    ['homo %']);
1599    
1600    it would return the number of genomes, not the number of genome/feature pairs.
1601    
1602    =over 4
1603    
1604    =item objectNames
1605    
1606    Reference to a list of the objects (entities and relationships) included in the
1607    query.
1608    
1609    =item filter
1610    
1611    A filter clause for restricting the query. The rules are the same as for the L</Get>
1612    method.
1613    
1614    =item params
1615    
1616    Reference to a list of the parameter values to be substituted for the parameter marks
1617    in the filter.
1618    
1619    =item RETURN
1620    
1621    Returns a count of the number of records in the first table that would satisfy
1622    the query.
1623    
1624    =back
1625    
1626    =cut
1627    
1628    sub GetCount {
1629        # Get the parameters.
1630        my ($self, $objectNames, $filter, $params) = @_;
1631        # Insure the params argument is an array reference if the caller left it off.
1632        if (! defined($params)) {
1633            $params = [];
1634        }
1635        # Declare the return variable.
1636        my $retVal;
1637        # Find out if we're counting an entity or a relationship.
1638        my $countedField;
1639        if ($self->IsEntity($objectNames->[0])) {
1640            $countedField = "id";
1641        } else {
1642            # For a relationship we count the to-link because it's usually more
1643            # numerous. Note we're automatically converting to the SQL form
1644            # of the field name (to_link vs. to-link).
1645            $countedField = "to_link";
1646        }
1647        # Create the SQL command suffix to get the desired records.
1648        my ($suffix, $mappedNameListRef, $mappedNameHashRef) = $self->_SetupSQL($objectNames,
1649                                                                                $filter);
1650        # Prefix it with text telling it we want a record count.
1651        my $firstObject = $mappedNameListRef->[0];
1652        my $command = "SELECT COUNT($firstObject.$countedField) $suffix";
1653        # Prepare and execute the command.
1654        my $sth = $self->_GetStatementHandle($command, $params);
1655        # Get the count value.
1656        ($retVal) = $sth->fetchrow_array();
1657        # Check for a problem.
1658        if (! defined($retVal)) {
1659            if ($sth->err) {
1660                # Here we had an SQL error.
1661                Confess("Error retrieving row count: " . $sth->errstr());
1662            } else {
1663                # Here we have no result.
1664                Confess("No result attempting to retrieve row count.");
1665            }
1666        }
1667        # Return the result.
1668        return $retVal;
1669    }
1670    
1671    =head3 ComputeObjectSentence
1672    
1673    C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1674    
1675    Check an object name, and if it is a relationship convert it to a relationship sentence.
1676    
1677    =over 4
1678    
1679    =item objectName
1680    
1681    Name of the entity or relationship.
1682    
1683    =item RETURN
1684    
1685    Returns a string containing the entity name or a relationship sentence.
1686    
1687    =back
1688    
1689    =cut
1690    
1691    sub ComputeObjectSentence {
1692        # Get the parameters.
1693        my ($self, $objectName) = @_;
1694        # Set the default return value.
1695        my $retVal = $objectName;
1696        # Look for the object as a relationship.
1697        my $relTable = $self->{_metaData}->{Relationships};
1698      if (exists $relTable->{$objectName}) {      if (exists $relTable->{$objectName}) {
1699          # Get the relationship sentence.          # Get the relationship sentence.
1700          $retVal = _ComputeRelationshipSentence($objectName, $relTable->{$objectName});          $retVal = _ComputeRelationshipSentence($objectName, $relTable->{$objectName});
# Line 1126  Line 1745 
1745      }      }
1746  }  }
1747    
1748    =head3 InsertValue
1749    
1750    C<< $erdb->InsertValue($entityID, $fieldName, $value); >>
1751    
1752    This method will insert a new value into the database. The value must be one
1753    associated with a secondary relation, since primary values cannot be inserted:
1754    they occur exactly once. Secondary values, on the other hand, can be missing
1755    or multiply-occurring.
1756    
1757    =over 4
1758    
1759    =item entityID
1760    
1761    ID of the object that is to receive the new value.
1762    
1763    =item fieldName
1764    
1765    Field name for the new value-- this includes the entity name, since
1766    field names are of the format I<objectName>C<(>I<fieldName>C<)>.
1767    
1768    =item value
1769    
1770    New value to be put in the field.
1771    
1772    =back
1773    
1774    =cut
1775    
1776    sub InsertValue {
1777        # Get the parameters.
1778        my ($self, $entityID, $fieldName, $value) = @_;
1779        # Parse the entity name and the real field name.
1780        if ($fieldName =~ /^([^(]+)\(([^)]+)\)/) {
1781            my $entityName = $1;
1782            my $fieldTitle = $2;
1783            # Get its descriptor.
1784            if (!$self->IsEntity($entityName)) {
1785                Confess("$entityName is not a valid entity.");
1786            } else {
1787                my $entityData = $self->{_metaData}->{Entities}->{$entityName};
1788                # Find the relation containing this field.
1789                my $fieldHash = $entityData->{Fields};
1790                if (! exists $fieldHash->{$fieldTitle}) {
1791                    Confess("$fieldTitle not found in $entityName.");
1792                } else {
1793                    my $relation = $fieldHash->{$fieldTitle}->{relation};
1794                    if ($relation eq $entityName) {
1795                        Confess("Cannot do InsertValue on primary field $fieldTitle of $entityName.");
1796                    } else {
1797                        # Now we can create an INSERT statement.
1798                        my $dbh = $self->{_dbh};
1799                        my $fixedName = _FixName($fieldTitle);
1800                        my $statement = "INSERT INTO $relation (id, $fixedName) VALUES(?, ?)";
1801                        # Execute the command.
1802                        $dbh->SQL($statement, 0, $entityID, $value);
1803                    }
1804                }
1805            }
1806        } else {
1807            Confess("$fieldName is not a valid field name.");
1808        }
1809    }
1810    
1811  =head3 InsertObject  =head3 InsertObject
1812    
1813  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
# Line 1142  Line 1824 
1824  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1825  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1826    
1827  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1828    
1829  =over 4  =over 4
1830    
# Line 1288  Line 1970 
1970    
1971  =item RETURN  =item RETURN
1972    
1973  Returns a statistical object containing the number of records read and a list of  Returns a statistical object containing a list of the error messages.
 the error messages.  
1974    
1975  =back  =back
1976    
# Line 1343  Line 2024 
2024          my $size = -s $fileName;          my $size = -s $fileName;
2025          Trace("$size bytes loaded into $relationName.") if T(2);          Trace("$size bytes loaded into $relationName.") if T(2);
2026          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
2027          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag) {
2028                # Indexes are created here for PostGres. For PostGres, indexes are
2029                # best built at the end. For MySQL, the reverse is true.
2030                if (! $dbh->{_preIndex}) {
2031              eval {              eval {
2032                  $self->CreateIndex($relationName);                  $self->CreateIndex($relationName);
2033              };              };
# Line 1351  Line 2035 
2035                  $retVal->AddMessage($@);                  $retVal->AddMessage($@);
2036              }              }
2037          }          }
2038                # The full-text index (if any) is always built last, even for MySQL.
2039                # First we need to see if this table has a full-text index. Only
2040                # primary relations are allowed that privilege.
2041                if ($self->_IsPrimary($relationName)) {
2042                    # Get the relation's entity/relationship structure.
2043                    my $structure = $self->_GetStructure($relationName);
2044                    # Check for a searchable fields list.
2045                    if (exists $structure->{searchFields}) {
2046                        # Here we know that we need to create a full-text search index.
2047                        # Get an SQL-formatted field name list.
2048                        my $fields = join(", ", $self->_FixNames(@{$structure->{searchFields}}));
2049                        # Create the index.
2050                        $dbh->create_index(tbl => $relationName, idx => "search_idx_$relationName",
2051                                           flds => $fields, kind => 'fulltext');
2052                    }
2053                }
2054            }
2055      }      }
     # Commit the database changes.  
     $dbh->commit_tran;  
2056      # Analyze the table to improve performance.      # Analyze the table to improve performance.
2057        Trace("Analyzing and compacting $relationName.") if T(3);
2058      $dbh->vacuum_it($relationName);      $dbh->vacuum_it($relationName);
2059        Trace("$relationName load completed.") if T(3);
2060      # Return the statistics.      # Return the statistics.
2061      return $retVal;      return $retVal;
2062  }  }
# Line 1414  Line 2115 
2115          # Generate data from the fields.          # Generate data from the fields.
2116          _GenerateFields($this, $fields, $type, $values);          _GenerateFields($this, $fields, $type, $values);
2117      }      }
2118      # Return the hash created.      # Return the hash created.
2119      return $this;      return $this;
2120    }
2121    
2122    =head3 GetEntity
2123    
2124    C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
2125    
2126    Return an object describing the entity instance with a specified ID.
2127    
2128    =over 4
2129    
2130    =item entityType
2131    
2132    Entity type name.
2133    
2134    =item ID
2135    
2136    ID of the desired entity.
2137    
2138    =item RETURN
2139    
2140    Returns a B<DBObject> representing the desired entity instance, or an undefined value if no
2141    instance is found with the specified key.
2142    
2143    =back
2144    
2145    =cut
2146    
2147    sub GetEntity {
2148        # Get the parameters.
2149        my ($self, $entityType, $ID) = @_;
2150        # Create a query.
2151        my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]);
2152        # Get the first (and only) object.
2153        my $retVal = $query->Fetch();
2154        # Return the result.
2155        return $retVal;
2156    }
2157    
2158    =head3 GetChoices
2159    
2160    C<< my @values = $erdb->GetChoices($entityName, $fieldName); >>
2161    
2162    Return a list of all the values for the specified field that are represented in the
2163    specified entity.
2164    
2165    Note that if the field is not indexed, then this will be a very slow operation.
2166    
2167    =over 4
2168    
2169    =item entityName
2170    
2171    Name of an entity in the database.
2172    
2173    =item fieldName
2174    
2175    Name of a field belonging to the entity. This is a raw field name without
2176    the standard parenthesized notation used in most calls.
2177    
2178    =item RETURN
2179    
2180    Returns a list of the distinct values for the specified field in the database.
2181    
2182    =back
2183    
2184    =cut
2185    
2186    sub GetChoices {
2187        # Get the parameters.
2188        my ($self, $entityName, $fieldName) = @_;
2189        # Declare the return variable.
2190        my @retVal;
2191        # Get the entity data structure.
2192        my $entityData = $self->_GetStructure($entityName);
2193        # Get the field.
2194        my $fieldHash = $entityData->{Fields};
2195        if (! exists $fieldHash->{$fieldName}) {
2196            Confess("$fieldName not found in $entityName.");
2197        } else {
2198            # Get the name of the relation containing the field.
2199            my $relation = $fieldHash->{$fieldName}->{relation};
2200            # Fix up the field name.
2201            my $realName = _FixName($fieldName);
2202            # Get the database handle.
2203            my $dbh = $self->{_dbh};
2204            # Query the database.
2205            my $results = $dbh->SQL("SELECT DISTINCT $realName FROM $relation");
2206            # Clean the results. They are stored as a list of lists, and we just want the one list.
2207            @retVal = sort map { $_->[0] } @{$results};
2208        }
2209        # Return the result.
2210        return @retVal;
2211    }
2212    
2213    =head3 GetEntityValues
2214    
2215    C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
2216    
2217    Return a list of values from a specified entity instance. If the entity instance
2218    does not exist, an empty list is returned.
2219    
2220    =over 4
2221    
2222    =item entityType
2223    
2224    Entity type name.
2225    
2226    =item ID
2227    
2228    ID of the desired entity.
2229    
2230    =item fields
2231    
2232    List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.
2233    
2234    =item RETURN
2235    
2236    Returns a flattened list of the values of the specified fields for the specified entity.
2237    
2238    =back
2239    
2240    =cut
2241    
2242    sub GetEntityValues {
2243        # Get the parameters.
2244        my ($self, $entityType, $ID, $fields) = @_;
2245        # Get the specified entity.
2246        my $entity = $self->GetEntity($entityType, $ID);
2247        # Declare the return list.
2248        my @retVal = ();
2249        # If we found the entity, push the values into the return list.
2250        if ($entity) {
2251            push @retVal, $entity->Values($fields);
2252        }
2253        # Return the result.
2254        return @retVal;
2255    }
2256    
2257    =head3 GetAll
2258    
2259    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
2260    
2261    Return a list of values taken from the objects returned by a query. The first three
2262    parameters correspond to the parameters of the L</Get> method. The final parameter is
2263    a list of the fields desired from each record found by the query. The field name
2264    syntax is the standard syntax used for fields in the B<ERDB> system--
2265    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
2266    or relationship and I<fieldName> is the name of the field.
2267    
2268    The list returned will be a list of lists. Each element of the list will contain
2269    the values returned for the fields specified in the fourth parameter. If one of the
2270    fields specified returns multiple values, they are flattened in with the rest. For
2271    example, the following call will return a list of the features in a particular
2272    spreadsheet cell, and each feature will be represented by a list containing the
2273    feature ID followed by all of its aliases.
2274    
2275    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
2276    
2277    =over 4
2278    
2279    =item objectNames
2280    
2281    List containing the names of the entity and relationship objects to be retrieved.
2282    
2283    =item filterClause
2284    
2285    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
2286    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
2287    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
2288    parameter list as additional parameters. The fields in a filter clause can come from primary
2289    entity relations, relationship relations, or secondary entity relations; however, all of the
2290    entities and relationships involved must be included in the list of object names.
2291    
2292    =item parameterList
2293    
2294    List of the parameters to be substituted in for the parameters marks in the filter clause.
2295    
2296    =item fields
2297    
2298    List of the fields to be returned in each element of the list returned.
2299    
2300    =item count
2301    
2302    Maximum number of records to return. If omitted or 0, all available records will be returned.
2303    
2304    =item RETURN
2305    
2306    Returns a list of list references. Each element of the return list contains the values for the
2307    fields specified in the B<fields> parameter.
2308    
2309    =back
2310    
2311    =cut
2312    #: Return Type @@;
2313    sub GetAll {
2314        # Get the parameters.
2315        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
2316        # Translate the parameters from a list reference to a list. If the parameter
2317        # list is a scalar we convert it into a singleton list.
2318        my @parmList = ();
2319        if (ref $parameterList eq "ARRAY") {
2320            Trace("GetAll parm list is an array.") if T(4);
2321            @parmList = @{$parameterList};
2322        } else {
2323            Trace("GetAll parm list is a scalar: $parameterList.") if T(4);
2324            push @parmList, $parameterList;
2325        }
2326        # Insure the counter has a value.
2327        if (!defined $count) {
2328            $count = 0;
2329        }
2330        # Add the row limit to the filter clause.
2331        if ($count > 0) {
2332            $filterClause .= " LIMIT $count";
2333        }
2334        # Create the query.
2335        my $query = $self->Get($objectNames, $filterClause, \@parmList);
2336        # Set up a counter of the number of records read.
2337        my $fetched = 0;
2338        # Loop through the records returned, extracting the fields. Note that if the
2339        # counter is non-zero, we stop when the number of records read hits the count.
2340        my @retVal = ();
2341        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
2342            my @rowData = $row->Values($fields);
2343            push @retVal, \@rowData;
2344            $fetched++;
2345        }
2346        Trace("$fetched rows returned in GetAll.") if T(SQL => 4);
2347        # Return the resulting list.
2348        return @retVal;
2349    }
2350    
2351    =head3 Exists
2352    
2353    C<< my $found = $sprout->Exists($entityName, $entityID); >>
2354    
2355    Return TRUE if an entity exists, else FALSE.
2356    
2357    =over 4
2358    
2359    =item entityName
2360    
2361    Name of the entity type (e.g. C<Feature>) relevant to the existence check.
2362    
2363    =item entityID
2364    
2365    ID of the entity instance whose existence is to be checked.
2366    
2367    =item RETURN
2368    
2369    Returns TRUE if the entity instance exists, else FALSE.
2370    
2371    =back
2372    
2373    =cut
2374    #: Return Type $;
2375    sub Exists {
2376        # Get the parameters.
2377        my ($self, $entityName, $entityID) = @_;
2378        # Check for the entity instance.
2379        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2380        my $testInstance = $self->GetEntity($entityName, $entityID);
2381        # Return an existence indicator.
2382        my $retVal = ($testInstance ? 1 : 0);
2383        return $retVal;
2384    }
2385    
2386    =head3 EstimateRowSize
2387    
2388    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
2389    
2390    Estimate the row size of the specified relation. The estimated row size is computed by adding
2391    up the average length for each data type.
2392    
2393    =over 4
2394    
2395    =item relName
2396    
2397    Name of the relation whose estimated row size is desired.
2398    
2399    =item RETURN
2400    
2401    Returns an estimate of the row size for the specified relation.
2402    
2403    =back
2404    
2405    =cut
2406    #: Return Type $;
2407    sub EstimateRowSize {
2408        # Get the parameters.
2409        my ($self, $relName) = @_;
2410        # Declare the return variable.
2411        my $retVal = 0;
2412        # Find the relation descriptor.
2413        my $relation = $self->_FindRelation($relName);
2414        # Get the list of fields.
2415        for my $fieldData (@{$relation->{Fields}}) {
2416            # Get the field type and add its length.
2417            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
2418            $retVal += $fieldLen;
2419        }
2420        # Return the result.
2421        return $retVal;
2422    }
2423    
2424    =head3 GetFieldTable
2425    
2426    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
2427    
2428    Get the field structure for a specified entity or relationship.
2429    
2430    =over 4
2431    
2432    =item objectName
2433    
2434    Name of the desired entity or relationship.
2435    
2436    =item RETURN
2437    
2438    The table containing the field descriptors for the specified object.
2439    
2440    =back
2441    
2442    =cut
2443    
2444    sub GetFieldTable {
2445        # Get the parameters.
2446        my ($self, $objectName) = @_;
2447        # Get the descriptor from the metadata.
2448        my $objectData = $self->_GetStructure($objectName);
2449        # Return the object's field table.
2450        return $objectData->{Fields};
2451    }
2452    
2453    =head2 Data Mining Methods
2454    
2455    =head3 GetUsefulCrossValues
2456    
2457    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
2458    
2459    Return a list of the useful attributes that would be returned by a B<Cross> call
2460    from an entity of the source entity type through the specified relationship. This
2461    means it will return the fields of the target entity type and the intersection data
2462    fields in the relationship. Only primary table fields are returned. In other words,
2463    the field names returned will be for fields where there is always one and only one
2464    value.
2465    
2466    =over 4
2467    
2468    =item sourceEntity
2469    
2470    Name of the entity from which the relationship crossing will start.
2471    
2472    =item relationship
2473    
2474    Name of the relationship being crossed.
2475    
2476    =item RETURN
2477    
2478    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2479    
2480    =back
2481    
2482    =cut
2483    #: Return Type @;
2484    sub GetUsefulCrossValues {
2485        # Get the parameters.
2486        my ($self, $sourceEntity, $relationship) = @_;
2487        # Declare the return variable.
2488        my @retVal = ();
2489        # Determine the target entity for the relationship. This is whichever entity is not
2490        # the source entity. So, if the source entity is the FROM, we'll get the name of
2491        # the TO, and vice versa.
2492        my $relStructure = $self->_GetStructure($relationship);
2493        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2494        my $targetEntity = $relStructure->{$targetEntityType};
2495        # Get the field table for the entity.
2496        my $entityFields = $self->GetFieldTable($targetEntity);
2497        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2498        # For the entity fields, the key aspect of the target structure is that the {relation} value
2499        # must match the entity name.
2500        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2501                            keys %{$entityFields};
2502        # Push the fields found onto the return variable.
2503        push @retVal, sort @fieldList;
2504        # Get the field table for the relationship.
2505        my $relationshipFields = $self->GetFieldTable($relationship);
2506        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2507        # This may end up being an empty set.
2508        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2509                            keys %{$relationshipFields};
2510        # Push these onto the return list.
2511        push @retVal, sort @fieldList2;
2512        # Return the result.
2513        return @retVal;
2514    }
2515    
2516    =head3 FindColumn
2517    
2518    C<< my $colIndex = ERDB::FindColumn($headerLine, $columnIdentifier); >>
2519    
2520    Return the location a desired column in a data mining header line. The data
2521    mining header line is a tab-separated list of column names. The column
2522    identifier is either the numerical index of a column or the actual column
2523    name.
2524    
2525    =over 4
2526    
2527    =item headerLine
2528    
2529    The header line from a data mining command, which consists of a tab-separated
2530    list of column names.
2531    
2532    =item columnIdentifier
2533    
2534    Either the ordinal number of the desired column (1-based), or the name of the
2535    desired column.
2536    
2537    =item RETURN
2538    
2539    Returns the array index (0-based) of the desired column.
2540    
2541    =back
2542    
2543    =cut
2544    
2545    sub FindColumn {
2546        # Get the parameters.
2547        my ($headerLine, $columnIdentifier) = @_;
2548        # Declare the return variable.
2549        my $retVal;
2550        # Split the header line into column names.
2551        my @headers = ParseColumns($headerLine);
2552        # Determine whether we have a number or a name.
2553        if ($columnIdentifier =~ /^\d+$/) {
2554            # Here we have a number. Subtract 1 and validate the result.
2555            $retVal = $columnIdentifier - 1;
2556            if ($retVal < 0 || $retVal > $#headers) {
2557                Confess("Invalid column identifer \"$columnIdentifier\": value out of range.");
2558            }
2559        } else {
2560            # Here we have a name. We need to find it in the list.
2561            for (my $i = 0; $i <= $#headers && ! defined($retVal); $i++) {
2562                if ($headers[$i] eq $columnIdentifier) {
2563                    $retVal = $i;
2564                }
2565            }
2566            if (! defined($retVal)) {
2567                Confess("Invalid column identifier \"$columnIdentifier\": value not found.");
2568            }
2569        }
2570        # Return the result.
2571        return $retVal;
2572  }  }
2573    
2574  =head3 GetEntity  =head3 ParseColumns
2575    
2576  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>  C<< my @columns = ERDB::ParseColumns($line); >>
2577    
2578  Return an object describing the entity instance with a specified ID.  Convert the specified data line to a list of columns.
2579    
2580  =over 4  =over 4
2581    
2582  =item entityType  =item line
   
 Entity type name.  
   
 =item ID  
2583    
2584  ID of the desired entity.  A data mining input, consisting of a tab-separated list of columns terminated by a
2585    new-line.
2586    
2587  =item RETURN  =item RETURN
2588    
2589  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a list consisting of the column values.
 instance is found with the specified key.  
2590    
2591  =back  =back
2592    
2593  =cut  =cut
2594    
2595  sub GetEntity {  sub ParseColumns {
2596      # Get the parameters.      # Get the parameters.
2597      my ($self, $entityType, $ID) = @_;      my ($line) = @_;
2598      # Create a query.      # Chop off the line-end.
2599      my $query = $self->Get([$entityType], "$entityType(id) = ?", $ID);      chomp $line;
2600      # Get the first (and only) object.      # Split it into a list.
2601      my $retVal = $query->Fetch();      my @retVal = split(/\t/, $line);
2602      # Return the result.      # Return the result.
2603      return $retVal;      return @retVal;
2604  }  }
2605    
2606  =head3 GetEntityValues  =head2 Internal Utility Methods
   
 C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>  
2607    
2608  Return a list of values from a specified entity instance.  =head3 _RelationMap
2609    
2610  =over 4  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>
2611    
2612  =item entityType  Create the relation map for an SQL query. The relation map is used by B<DBObject>
2613    to determine how to interpret the results of the query.
2614    
2615  Entity type name.  =over 4
2616    
2617  =item ID  =item mappedNameHashRef
2618    
2619  ID of the desired entity.  Reference to a hash that maps modified object names to real object names.
2620    
2621  =item fields  =item mappedNameListRef
2622    
2623  List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  Reference to a list of modified object names in the order they appear in the
2624    SELECT list.
2625    
2626  =item RETURN  =item RETURN
2627    
2628  Returns a flattened list of the values of the specified fields for the specified entity.  Returns a list of 2-tuples. Each tuple consists of an object name as used in the
2629    query followed by the actual name of that object. This enables the B<DBObject> to
2630    determine the order of the tables in the query and which object name belongs to each
2631    mapped object name. Most of the time these two values are the same; however, if a
2632    relation occurs twice in the query, the relation name in the field list and WHERE
2633    clause will use a mapped name (generally the actual relation name with a numeric
2634    suffix) that does not match the actual relation name.
2635    
2636  =back  =back
2637    
2638  =cut  =cut
2639    
2640  sub GetEntityValues {  sub _RelationMap {
2641      # Get the parameters.      # Get the parameters.
2642      my ($self, $entityType, $ID, $fields) = @_;      my ($mappedNameHashRef, $mappedNameListRef) = @_;
2643      # Get the specified entity.      # Declare the return variable.
     my $entity = $self->GetEntity($entityType, $ID);  
     # Declare the return list.  
2644      my @retVal = ();      my @retVal = ();
2645      # If we found the entity, push the values into the return list.      # Build the map.
2646      if ($entity) {      for my $mappedName (@{$mappedNameListRef}) {
2647          push @retVal, $entity->Values($fields);          push @retVal, [$mappedName, $mappedNameHashRef->{$mappedName}];
2648      }      }
2649      # Return the result.      # Return it.
2650      return @retVal;      return @retVal;
2651  }  }
2652    
 =head3 GetAll  
   
 C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
2653    
2654  Return a list of values taken from the objects returned by a query. The first three  =head3 _SetupSQL
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
2655    
2656  The list returned will be a list of lists. Each element of the list will contain  Process a list of object names and a filter clause so that they can be used to
2657  the values returned for the fields specified in the fourth parameter. If one of the  build an SQL statement. This method takes in a reference to a list of object names
2658  fields specified returns multiple values, they are flattened in with the rest. For  and a filter clause. It will return a corrected filter clause, a list of mapped
2659  example, the following call will return a list of the features in a particular  names and the mapped name hash.
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
2660    
2661  C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  This is an instance method.
2662    
2663  =over 4  =over 4
2664    
2665  =item objectNames  =item objectNames
2666    
2667  List containing the names of the entity and relationship objects to be retrieved.  Reference to a list of the object names to be included in the query.
2668    
2669  =item filterClause  =item filterClause
2670    
2671  WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  A string containing the WHERE clause for the query (without the C<WHERE>) and also
2672  be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  optionally the C<ORDER BY> and C<LIMIT> clauses.
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
   
 List of the fields to be returned in each element of the list returned.  
2673    
2674  =item count  =item matchClause
2675    
2676  Maximum number of records to return. If omitted or 0, all available records will be returned.  An optional full-text search clause. If specified, it will be inserted at the
2677    front of the WHERE clause. It should already be SQL-formatted; that is, the
2678    field names should be in the form I<table>C<.>I<fieldName>.
2679    
2680  =item RETURN  =item RETURN
2681    
2682  Returns a list of list references. Each element of the return list contains the values for the  Returns a three-element list. The first element is the SQL statement suffix, beginning
2683  fields specified in the B<fields> parameter.  with the FROM clause. The second element is a reference to a list of the names to be
2684    used in retrieving the fields. The third element is a hash mapping the names to the
2685    objects they represent.
2686    
2687  =back  =back
2688    
2689  =cut  =cut
2690  #: Return Type @@;  
2691  sub GetAll {  sub _SetupSQL {
2692      # Get the parameters.      my ($self, $objectNames, $filterClause, $matchClause) = @_;
2693      my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;      # Adjust the list of object names to account for multiple occurrences of the
2694      # Translate the parameters from a list reference to a list. If the parameter      # same object. We start with a hash table keyed on object name that will
2695      # list is a scalar we convert it into a singleton list.      # return the object suffix. The first time an object is encountered it will
2696      my @parmList = ();      # not be found in the hash. The next time the hash will map the object name
2697      if (ref $parameterList eq "ARRAY") {      # to 2, then 3, and so forth.
2698          @parmList = @{$parameterList};      my %objectHash = ();
2699        # This list will contain the object names as they are to appear in the
2700        # FROM list.
2701        my @fromList = ();
2702        # This list contains the suffixed object name for each object. It is exactly
2703        # parallel to the list in the $objectNames parameter.
2704        my @mappedNameList = ();
2705        # Finally, this hash translates from a mapped name to its original object name.
2706        my %mappedNameHash = ();
2707        # Now we create the lists. Note that for every single name we push something into
2708        # @fromList and @mappedNameList. This insures that those two arrays are exactly
2709        # parallel to $objectNames.
2710        for my $objectName (@{$objectNames}) {
2711            # Get the next suffix for this object.
2712            my $suffix = $objectHash{$objectName};
2713            if (! $suffix) {
2714                # Here we are seeing the object for the first time. The object name
2715                # is used as is.
2716                push @mappedNameList, $objectName;
2717                push @fromList, $objectName;
2718                $mappedNameHash{$objectName} = $objectName;
2719                # Denote the next suffix will be 2.
2720                $objectHash{$objectName} = 2;
2721      } else {      } else {
2722          push @parmList, $parameterList;              # Here we've seen the object before. We construct a new name using
2723                # the suffix from the hash and update the hash.
2724                my $mappedName = "$objectName$suffix";
2725                $objectHash{$objectName} = $suffix + 1;
2726                # The FROM list has the object name followed by the mapped name. This
2727                # tells SQL it's still the same table, but we're using a different name
2728                # for it to avoid confusion.
2729                push @fromList, "$objectName $mappedName";
2730                # The mapped-name list contains the real mapped name.
2731                push @mappedNameList, $mappedName;
2732                # Finally, enable us to get back from the mapped name to the object name.
2733                $mappedNameHash{$mappedName} = $objectName;
2734      }      }
     # Create the query.  
     my $query = $self->Get($objectNames, $filterClause, @parmList);  
     # Set up a counter of the number of records read.  
     my $fetched = 0;  
     # Insure the counter has a value.  
     if (!defined $count) {  
         $count = 0;  
2735      }      }
2736      # Loop through the records returned, extracting the fields. Note that if the      # Begin the SELECT suffix. It starts with
2737      # counter is non-zero, we stop when the number of records read hits the count.      #
2738      my @retVal = ();      # FROM name1, name2, ... nameN
2739      while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {      #
2740          my @rowData = $row->Values($fields);      my $suffix = "FROM " . join(', ', @fromList);
2741          push @retVal, \@rowData;      # Now for the WHERE. First, we need a place for the filter string.
2742          $fetched++;      my $filterString = "";
2743        # We will also keep a list of conditions to add to the WHERE clause in order to link
2744        # entities and relationships as well as primary relations to secondary ones.
2745        my @joinWhere = ();
2746        # Check for a filter clause.
2747        if ($filterClause) {
2748            # Here we have one, so we convert its field names and add it to the query. First,
2749            # We create a copy of the filter string we can work with.
2750            $filterString = $filterClause;
2751            # Next, we sort the object names by length. This helps protect us from finding
2752            # object names inside other object names when we're doing our search and replace.
2753            my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
2754            # The final preparatory step is to create a hash table of relation names. The
2755            # table begins with the relation names already in the SELECT command. We may
2756            # need to add relations later if there is filtering on a field in a secondary
2757            # relation. The secondary relations are the ones that contain multiply-
2758            # occurring or optional fields.
2759            my %fromNames = map { $_ => 1 } @sortedNames;
2760            # We are ready to begin. We loop through the object names, replacing each
2761            # object name's field references by the corresponding SQL field reference.
2762            # Along the way, if we find a secondary relation, we will need to add it
2763            # to the FROM clause.
2764            for my $mappedName (@sortedNames) {
2765                # Get the length of the object name plus 2. This is the value we add to the
2766                # size of the field name to determine the size of the field reference as a
2767                # whole.
2768                my $nameLength = 2 + length $mappedName;
2769                # Get the real object name for this mapped name.
2770                my $objectName = $mappedNameHash{$mappedName};
2771                Trace("Processing $mappedName for object $objectName.") if T(4);
2772                # Get the object's field list.
2773                my $fieldList = $self->GetFieldTable($objectName);
2774                # Find the field references for this object.
2775                while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
2776                    # At this point, $1 contains the field name, and the current position
2777                    # is set immediately after the final parenthesis. We pull out the name of
2778                    # the field and the position and length of the field reference as a whole.
2779                    my $fieldName = $1;
2780                    my $len = $nameLength + length $fieldName;
2781                    my $pos = pos($filterString) - $len;
2782                    # Insure the field exists.
2783                    if (!exists $fieldList->{$fieldName}) {
2784                        Confess("Field $fieldName not found for object $objectName.");
2785                    } else {
2786                        Trace("Processing $fieldName at position $pos.") if T(4);
2787                        # Get the field's relation.
2788                        my $relationName = $fieldList->{$fieldName}->{relation};
2789                        # Now we have a secondary relation. We need to insure it matches the
2790                        # mapped name of the primary relation. First we peel off the suffix
2791                        # from the mapped name.
2792                        my $mappingSuffix = substr $mappedName, length($objectName);
2793                        # Put the mapping suffix onto the relation name to get the
2794                        # mapped relation name.
2795                        my $mappedRelationName = "$relationName$mappingSuffix";
2796                        # Insure the relation is in the FROM clause.
2797                        if (!exists $fromNames{$mappedRelationName}) {
2798                            # Add the relation to the FROM clause.
2799                            if ($mappedRelationName eq $relationName) {
2800                                # The name is un-mapped, so we add it without
2801                                # any frills.
2802                                $suffix .= ", $relationName";
2803                                push @joinWhere, "$objectName.id = $relationName.id";
2804                            } else {
2805                                # Here we have a mapping situation.
2806                                $suffix .= ", $relationName $mappedRelationName";
2807                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
2808                            }
2809                            # Denote we have this relation available for future fields.
2810                            $fromNames{$mappedRelationName} = 1;
2811                        }
2812                        # Form an SQL field reference from the relation name and the field name.
2813                        my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
2814                        # Put it into the filter string in place of the old value.
2815                        substr($filterString, $pos, $len) = $sqlReference;
2816                        # Reposition the search.
2817                        pos $filterString = $pos + length $sqlReference;
2818                    }
2819      }      }
2820      # Return the resulting list.          }
2821      return @retVal;      }
2822        # The next step is to join the objects together. We only need to do this if there
2823        # is more than one object in the object list. We start with the first object and
2824        # run through the objects after it. Note also that we make a safety copy of the
2825        # list before running through it, because we shift off the first object before
2826        # processing the rest.
2827        my @mappedObjectList = @mappedNameList;
2828        my $lastMappedObject = shift @mappedObjectList;
2829        # Get the join table.
2830        my $joinTable = $self->{_metaData}->{Joins};
2831        # Loop through the object list.
2832        for my $thisMappedObject (@mappedObjectList) {
2833            # Look for a join using the real object names.
2834            my $lastObject = $mappedNameHash{$lastMappedObject};
2835            my $thisObject = $mappedNameHash{$thisMappedObject};
2836            my $joinKey = "$lastObject/$thisObject";
2837            if (!exists $joinTable->{$joinKey}) {
2838                # Here there's no join, so we throw an error.
2839                Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
2840            } else {
2841                # Get the join clause.
2842                my $unMappedJoin = $joinTable->{$joinKey};
2843                # Fix the names.
2844                $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
2845                $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
2846                push @joinWhere, $unMappedJoin;
2847                # Save this object as the last object for the next iteration.
2848                $lastMappedObject = $thisMappedObject;
2849            }
2850        }
2851        # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
2852        # here is we want the filter clause to be empty if there's no WHERE filter.
2853        # We'll put the ORDER BY / LIMIT clauses in the following variable.
2854        my $orderClause = "";
2855        # This is only necessary if we have a filter string in which the ORDER BY
2856        # and LIMIT clauses can live.
2857        if ($filterString) {
2858            # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
2859            # operator so that we find the first occurrence of either verb.
2860            if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
2861                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
2862                my $pos = pos $filterString;
2863                $orderClause = $2 . substr($filterString, $pos);
2864                $filterString = $1;
2865            }
2866        }
2867        # All the things that are supposed to be in the WHERE clause of the
2868        # SELECT command need to be put into @joinWhere so we can string them
2869        # together. We begin with the match clause. This is important,
2870        # because the match clause's parameter mark must precede any parameter
2871        # marks in the filter string.
2872        if ($matchClause) {
2873            push @joinWhere, $matchClause;
2874        }
2875        # Add the filter string. We put it in parentheses to avoid operator
2876        # precedence problems with the match clause or the joins.
2877        if ($filterString) {
2878            Trace("Filter string is \"$filterString\".") if T(4);
2879            push @joinWhere, "($filterString)";
2880        }
2881        # String it all together into a big filter clause.
2882        if (@joinWhere) {
2883            $suffix .= " WHERE " . join(' AND ', @joinWhere);
2884        }
2885        # Add the sort or limit clause (if any).
2886        if ($orderClause) {
2887            $suffix .= " $orderClause";
2888        }
2889        # Return the suffix, the mapped name list, and the mapped name hash.
2890        return ($suffix, \@mappedNameList, \%mappedNameHash);
2891  }  }
2892    
2893  =head3 EstimateRowSize  =head3 _GetStatementHandle
2894    
2895  C<< my $rowSize = $erdb->EstimateRowSize($relName); >>  This method will prepare and execute an SQL query, returning the statement handle.
2896    The main reason for doing this here is so that everybody who does SQL queries gets
2897    the benefit of tracing.
2898    
2899  Estimate the row size of the specified relation. The estimated row size is computed by adding  This is an instance method.
 up the average length for each data type.  
2900    
2901  =over 4  =over 4
2902    
2903  =item relName  =item command
2904    
2905  Name of the relation whose estimated row size is desired.  Command to prepare and execute.
2906    
2907    =item params
2908    
2909    Reference to a list of the values to be substituted in for the parameter marks.
2910    
2911  =item RETURN  =item RETURN
2912    
2913  Returns an estimate of the row size for the specified relation.  Returns a prepared and executed statement handle from which the caller can extract
2914    results.
2915    
2916  =back  =back
2917    
2918  =cut  =cut
2919  #: Return Type $;  
2920  sub EstimateRowSize {  sub _GetStatementHandle {
2921      # Get the parameters.      # Get the parameters.
2922      my ($self, $relName) = @_;      my ($self, $command, $params) = @_;
2923      # Declare the return variable.      # Trace the query.
2924      my $retVal = 0;      Trace("SQL query: $command") if T(SQL => 3);
2925      # Find the relation descriptor.      Trace("PARMS: '" . (join "', '", @{$params}) . "'") if (T(SQL => 4) && (@{$params} > 0));
2926      my $relation = $self->_FindRelation($relName);      # Get the database handle.
2927      # Get the list of fields.      my $dbh = $self->{_dbh};
2928      for my $fieldData (@{$relation->{Fields}}) {      # Prepare the command.
2929          # Get the field type and add its length.      my $sth = $dbh->prepare_command($command);
2930          my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};      # Execute it with the parameters bound in.
2931          $retVal += $fieldLen;      $sth->execute(@{$params}) || Confess("SELECT error" . $sth->errstr());
2932      }      # Return the statement handle.
2933      # Return the result.      return $sth;
     return $retVal;  
2934  }  }
2935    
2936  =head2 Internal Utility Methods  =head3 _GetLoadStats
   
 =head3 GetLoadStats  
2937    
2938  Return a blank statistics object for use by the load methods.  Return a blank statistics object for use by the load methods.
2939    
# Line 1633  Line 2942 
2942  =cut  =cut
2943    
2944  sub _GetLoadStats {  sub _GetLoadStats {
2945      return Stats->new('records');      return Stats->new();
2946  }  }
2947    
2948  =head3 GenerateFields  =head3 _GenerateFields
2949    
2950  Generate field values from a field structure and store in a specified table. The field names  Generate field values from a field structure and store in a specified table. The field names
2951  are first sorted by pass count, certain pre-defined fields are removed from the list, and  are first sorted by pass count, certain pre-defined fields are removed from the list, and
# Line 1710  Line 3019 
3019      }      }
3020  }  }
3021    
3022  =head3 DumpRelation  =head3 _DumpRelation
3023    
3024  Dump the specified relation's to the specified output file in tab-delimited format.  Dump the specified relation's to the specified output file in tab-delimited format.
3025    
# Line 1760  Line 3069 
3069      close DTXOUT;      close DTXOUT;
3070  }  }
3071    
3072  =head3 GetStructure  =head3 _GetStructure
3073    
3074  Get the data structure for a specified entity or relationship.  Get the data structure for a specified entity or relationship.
3075    
# Line 1799  Line 3108 
3108      return $retVal;      return $retVal;
3109  }  }
3110    
3111  =head3 GetRelationTable  
3112    
3113    =head3 _GetRelationTable
3114    
3115  Get the list of relations for a specified entity or relationship.  Get the list of relations for a specified entity or relationship.
3116    
# Line 1828  Line 3139 
3139      return $objectData->{Relations};      return $objectData->{Relations};
3140  }  }
3141    
3142  =head3 GetFieldTable  =head3 _ValidateFieldNames
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
 =head3 ValidateFieldNames  
3143    
3144  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
3145  will be written to the standard error output. If there is an error, this method will abort. This is  will be written to the standard error output. If there is an error, this method will abort. This is
# Line 1912  Line 3194 
3194      }      }
3195  }  }
3196    
3197  =head3 LoadRelation  =head3 _LoadRelation
3198    
3199  Load a relation from the data in a tab-delimited disk file. The load will only take place if a disk  Load a relation from the data in a tab-delimited disk file. The load will only take place if a disk
3200  file with the same name as the relation exists in the specified directory.  file with the same name as the relation exists in the specified directory.
# Line 1972  Line 3254 
3254      return $retVal;      return $retVal;
3255  }  }
3256    
3257  =head3 LoadMetaData  =head3 _LoadMetaData
3258    
3259  This method loads the data describing this database from an XML file into a metadata structure.  This method loads the data describing this database from an XML file into a metadata structure.
3260  The resulting structure is a set of nested hash tables containing all the information needed to  The resulting structure is a set of nested hash tables containing all the information needed to
# Line 2210  Line 3492 
3492              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
3493              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
3494              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
3495              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
3496              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
3497                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
3498                      # Here the relationship is recursive.                      # Here the relationship is recursive.
# Line 2299  Line 3581 
3581      return $metadata;      return $metadata;
3582  }  }
3583    
3584  =head3 CreateRelationshipIndex  =head3 _CreateRelationshipIndex
3585    
3586  Create an index for a relationship's relation.  Create an index for a relationship's relation.
3587    
# Line 2344  Line 3626 
3626      _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);      _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
3627  }  }
3628    
3629  =head3 AddIndex  =head3 _AddIndex
3630    
3631  Add an index to a relation structure.  Add an index to a relation structure.
3632    
# Line 2390  Line 3672 
3672      $relationStructure->{Indexes}->{$indexName} = $newIndex;      $relationStructure->{Indexes}->{$indexName} = $newIndex;
3673  }  }
3674    
3675  =head3 FixupFields  =head3 _FixupFields
3676    
3677  This method fixes the field list for an entity or relationship. It will add the caller-specified  This method fixes the field list for an entity or relationship. It will add the caller-specified
3678  relation name to fields that do not have a name and set the C<PrettySort> value as specified.  relation name to fields that do not have a name and set the C<PrettySort> value as specified.
# Line 2428  Line 3710 
3710          # Here it doesn't, so we create a new one.          # Here it doesn't, so we create a new one.
3711          $structure->{Fields} = { };          $structure->{Fields} = { };
3712      } else {      } else {
3713          # Here we have a field list. Loop through its fields.          # Here we have a field list. We need to track the searchable fields, so we
3714            # create a list for stashing them.
3715            my @textFields = ();
3716            # Loop through the fields.
3717          my $fieldStructures = $structure->{Fields};          my $fieldStructures = $structure->{Fields};
3718          for my $fieldName (keys %{$fieldStructures}) {          for my $fieldName (keys %{$fieldStructures}) {
3719              Trace("Processing field $fieldName of $defaultRelationName.") if T(4);              Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
# Line 2442  Line 3727 
3727                  # The data generator will use the default for the field's type.                  # The data generator will use the default for the field's type.
3728                  $fieldData->{DataGen} = { content => $TypeTable{$type}->{dataGen} };                  $fieldData->{DataGen} = { content => $TypeTable{$type}->{dataGen} };
3729              }              }
3730                # Check for searchability.
3731                if ($fieldData->{searchable}) {
3732                    # Only allow this for a primary relation.
3733                    if ($fieldData->{relation} ne $defaultRelationName) {
3734                        Confess("Field $fieldName of $defaultRelationName is in secondary relations and cannot be searchable.");
3735                    } else {
3736                        push @textFields, $fieldName;
3737                    }
3738                }
3739              # Plug in the defaults for the optional data generation parameters.              # Plug in the defaults for the optional data generation parameters.
3740              Tracer::MergeOptions($fieldData->{DataGen}, { testCount => 1, pass => 0 });              Tracer::MergeOptions($fieldData->{DataGen}, { testCount => 1, pass => 0 });
3741              # Add the PrettySortValue.              # Add the PrettySortValue.
3742              $fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue);              $fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue);
3743          }          }
3744            # If there are searchable fields, remember the fact.
3745            if (@textFields) {
3746                $structure->{searchFields} = \@textFields;
3747            }
3748      }      }
3749  }  }
3750    
3751  =head3 FixName  =head3 _FixName
3752    
3753  Fix the incoming field name so that it is a legal SQL column name.  Fix the incoming field name so that it is a legal SQL column name.
3754    
# Line 2479  Line 3777 
3777      return $fieldName;      return $fieldName;
3778  }  }
3779    
3780  =head3 FixNames  =head3 _FixNames
3781    
3782  Fix all the field names in a list.  Fix all the field names in a list.
3783    
# Line 2510  Line 3808 
3808      return @result;      return @result;
3809  }  }
3810    
3811  =head3 AddField  =head3 _AddField
3812    
3813  Add a field to a field list.  Add a field to a field list.
3814    
# Line 2545  Line 3843 
3843      $fieldList->{$fieldName} = $fieldStructure;      $fieldList->{$fieldName} = $fieldStructure;
3844  }  }
3845    
3846  =head3 ReOrderRelationTable  =head3 _ReOrderRelationTable
3847    
3848  This method will take a relation table and re-sort it according to the implicit ordering of the  This method will take a relation table and re-sort it according to the implicit ordering of the
3849  C<PrettySort> property. Instead of a hash based on field names, it will return a list of fields.  C<PrettySort> property. Instead of a hash based on field names, it will return a list of fields.
# Line 2606  Line 3904 
3904    
3905  }  }
3906    
3907  =head3 IsPrimary  =head3 _IsPrimary
3908    
3909  Return TRUE if a specified relation is a primary relation, else FALSE. A relation is primary  Return TRUE if a specified relation is a primary relation, else FALSE. A relation is primary
3910  if it has the same name as an entity or relationship.  if it has the same name as an entity or relationship.
# Line 2642  Line 3940 
3940      return $retVal;      return $retVal;
3941  }  }
3942    
3943  =head3 FindRelation  =head3 _FindRelation
3944    
3945  Return the descriptor for the specified relation.  Return the descriptor for the specified relation.
3946    
# Line 2673  Line 3971 
3971    
3972  =head2 HTML Documentation Utility Methods  =head2 HTML Documentation Utility Methods
3973    
3974  =head3 ComputeRelationshipSentence  =head3 _ComputeRelationshipSentence
3975    
3976  The relationship sentence consists of the relationship name between the names of the  The relationship sentence consists of the relationship name between the names of the
3977  two related entities and an arity indicator.  two related entities and an arity indicator.
# Line 2711  Line 4009 
4009      return $result;      return $result;
4010  }  }
4011    
4012  =head3 ComputeRelationshipHeading  =head3 _ComputeRelationshipHeading
4013    
4014  The relationship heading is the L<relationship sentence|/ComputeRelationshipSentence> with the entity  The relationship heading is the L<relationship sentence|/ComputeRelationshipSentence> with the entity
4015  names hyperlinked to the appropriate entity sections of the document.  names hyperlinked to the appropriate entity sections of the document.
# Line 2748  Line 4046 
4046      return $result;      return $result;
4047  }  }
4048    
4049  =head3 ShowRelationTable  =head3 _ShowRelationTable
4050    
4051  Generate the HTML string for a particular relation. The relation's data will be formatted as an HTML  Generate the HTML string for a particular relation. The relation's data will be formatted as an HTML
4052  table with three columns-- the field name, the field type, and the field description.  table with three columns-- the field name, the field type, and the field description.
# Line 2809  Line 4107 
4107      $htmlString .= "</ul>\n";      $htmlString .= "</ul>\n";
4108  }  }
4109    
4110  =head3 OpenFieldTable  =head3 _OpenFieldTable
4111    
4112  This method creates the header string for the field table generated by L</ShowMetaData>.  This method creates the header string for the field table generated by L</ShowMetaData>.
4113    
# Line 2834  Line 4132 
4132      return _OpenTable($tablename, 'Field', 'Type', 'Description');      return _OpenTable($tablename, 'Field', 'Type', 'Description');
4133  }  }
4134    
4135  =head3 OpenTable  =head3 _OpenTable
4136    
4137  This method creates the header string for an HTML table.  This method creates the header string for an HTML table.
4138    
# Line 2874  Line 4172 
4172      return $htmlString;      return $htmlString;
4173  }  }
4174    
4175  =head3 CloseTable  =head3 _CloseTable
4176    
4177  This method returns the HTML for closing a table.  This method returns the HTML for closing a table.
4178    
# Line 2886  Line 4184 
4184      return "</table></p>\n";      return "</table></p>\n";
4185  }  }
4186    
4187  =head3 ShowField  =head3 _ShowField
4188    
4189  This method returns the HTML for displaying a row of field information in a field table.  This method returns the HTML for displaying a row of field information in a field table.
4190    
# Line 2921  Line 4219 
4219      return $htmlString;      return $htmlString;
4220  }  }
4221    
4222  =head3 HTMLNote  =head3 _HTMLNote
4223    
4224  Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes  Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes
4225  supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph.  supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph.

Legend:
Removed from v.1.23  
changed lines
  Added in v.1.70

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3