[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14, Mon Jun 27 15:41:24 2005 UTC revision 1.71, Sat Oct 14 18:08:12 2006 UTC
# Line 9  Line 9 
9      use DBObject;      use DBObject;
10      use Stats;      use Stats;
11      use Time::HiRes qw(gettimeofday);      use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13        use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
16    
# Line 89  Line 91 
91    
92  32-bit signed integer  32-bit signed integer
93    
94    =item counter
95    
96    32-bit unsigned integer
97    
98  =item date  =item date
99    
100  64-bit unsigned integer, representing a PERL date/time value  64-bit unsigned integer, representing a PERL date/time value
# Line 108  Line 114 
114  compatability with certain database packages), but the only values supported are  compatability with certain database packages), but the only values supported are
115  0 and 1.  0 and 1.
116    
117    =item id-string
118    
119    variable-length string, maximum 25 characters
120    
121  =item key-string  =item key-string
122    
123  variable-length string, maximum 40 characters  variable-length string, maximum 40 characters
# Line 124  Line 134 
134    
135  variable-length string, maximum 255 characters  variable-length string, maximum 255 characters
136    
137    =item hash-string
138    
139    variable-length string, maximum 22 characters
140    
141  =back  =back
142    
143    The hash-string data type has a special meaning. The actual key passed into the loader will
144    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
145    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
146    digest. Therefore, it is presumed the keys will be unique. When the database is actually
147    in use, the hashed keys will be presented rather than the original values. For this reason,
148    they should not be used for entities where the key is meaningful.
149    
150  =head3 Global Tags  =head3 Global Tags
151    
152  The entire database definition must be inside a B<Database> tag. The display name of  The entire database definition must be inside a B<Database> tag. The display name of
# Line 169  Line 190 
190    
191  Name of the field. The field name should contain only letters, digits, and hyphens (C<->),  Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
192  and the first character should be a letter. Most underlying databases are case-insensitive  and the first character should be a letter. Most underlying databases are case-insensitive
193  with the respect to field names, so a best practice is to use lower-case letters only.  with the respect to field names, so a best practice is to use lower-case letters only. Finally,
194    the name C<search-relevance> has special meaning for full-text searches and should not be
195    used as a field name.
196    
197  =item type  =item type
198    
# Line 188  Line 211 
211  entity, the fields without a relation attribute are said to belong to the  entity, the fields without a relation attribute are said to belong to the
212  I<primary relation>. This relation has the same name as the entity itself.  I<primary relation>. This relation has the same name as the entity itself.
213    
214    =item searchable
215    
216    If specified, then the field is a candidate for full-text searching. A single full-text
217    index will be created for each relation with at least one searchable field in it.
218    For best results, this option should only be used for string or text fields.
219    
220  =back  =back
221    
222  =head3 Indexes  =head3 Indexes
# Line 300  Line 329 
329  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
330  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
331  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
332   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
333  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            dataGen => "StringGen('A')" },  # record sizes. "sort" is the key modifier for the sort command.
334                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, sort => "",  dataGen => "StringGen('A')" },
335                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, sort => "n", dataGen => "IntGen(0, 99999999)" },
336                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    counter => { sqlType => 'INTEGER UNSIGNED',   maxLen => 20,           avgLen =>   4, sort => "n", dataGen => "IntGen(0, 99999999)" },
337                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, sort => "",  dataGen => "StringGen(IntGen(10,250))" },
338                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           dataGen => "FloatGen(0.0, 100.0)" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, sort => "",  dataGen => "StringGen(IntGen(80,1000))" },
339                    boolean => { sqlType => 'SMALLINT',           maxLen => 1,            dataGen => "IntGen(0, 1)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, sort => "n", dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
340                      float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, sort => "g", dataGen => "FloatGen(0.0, 100.0)" },
341                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, sort => "n", dataGen => "IntGen(0, 1)" },
342                     'hash-string' =>
343                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, sort => "",  dataGen => "SringGen(22)" },
344                     'id-string' =>
345                                 { sqlType => 'VARCHAR(25)',        maxLen => 25,           avgLen =>  25, sort => "",  dataGen => "SringGen(22)" },
346                   'key-string' =>                   'key-string' =>
347                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, sort => "",  dataGen => "StringGen(IntGen(10,40))" },
348                   'name-string' =>                   'name-string' =>
349                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, sort => "",  dataGen => "StringGen(IntGen(10,80))" },
350                   'medium-string' =>                   'medium-string' =>
351                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, sort => "",  dataGen => "StringGen(IntGen(10,160))" },
352                  );                  );
353    
354  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 369  Line 404 
404    
405  =head3 ShowMetaData  =head3 ShowMetaData
406    
407  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
408    
409  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
410  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 400  Line 435 
435      # Write the HTML heading stuff.      # Write the HTML heading stuff.
436      print HTMLOUT "<html>\n<head>\n<title>$title</title>\n";      print HTMLOUT "<html>\n<head>\n<title>$title</title>\n";
437      print HTMLOUT "</head>\n<body>\n";      print HTMLOUT "</head>\n<body>\n";
438        # Write the documentation.
439        print HTMLOUT $self->DisplayMetaData();
440        # Close the document.
441        print HTMLOUT "</body>\n</html>\n";
442        # Close the file.
443        close HTMLOUT;
444    }
445    
446    =head3 DisplayMetaData
447    
448    C<< my $html = $erdb->DisplayMetaData(); >>
449    
450    Return an HTML description of the database. This description can be used to help users create
451    the data to be loaded into the relations and form queries. The output is raw includable HTML
452    without any HEAD or BODY tags.
453    
454    =over 4
455    
456    =item filename
457    
458    The name of the output file.
459    
460    =back
461    
462    =cut
463    
464    sub DisplayMetaData {
465        # Get the parameters.
466        my ($self) = @_;
467        # Get the metadata and the title string.
468        my $metadata = $self->{_metaData};
469        # Get the title string.
470        my $title = $metadata->{Title};
471        # Get the entity and relationship lists.
472        my $entityList = $metadata->{Entities};
473        my $relationshipList = $metadata->{Relationships};
474        # Declare the return variable.
475        my $retVal = "";
476        # Open the output file.
477        Trace("Building MetaData table of contents.") if T(4);
478      # Here we do the table of contents. It starts as an unordered list of section names. Each      # Here we do the table of contents. It starts as an unordered list of section names. Each
479      # section contains an ordered list of entity or relationship subsections.      # section contains an ordered list of entity or relationship subsections.
480      print HTMLOUT "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n";      $retVal .= "<ul>\n<li><a href=\"#EntitiesSection\">Entities</a>\n<ol>\n";
481      # Loop through the Entities, displaying a list item for each.      # Loop through the Entities, displaying a list item for each.
482      foreach my $key (sort keys %{$entityList}) {      foreach my $key (sort keys %{$entityList}) {
483          # Display this item.          # Display this item.
484          print HTMLOUT "<li><a href=\"#$key\">$key</a></li>\n";          $retVal .= "<li><a href=\"#$key\">$key</a></li>\n";
485      }      }
486      # Close off the entity section and start the relationship section.      # Close off the entity section and start the relationship section.
487      print HTMLOUT "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n";      $retVal .= "</ol></li>\n<li><a href=\"#RelationshipsSection\">Relationships</a>\n<ol>\n";
488      # Loop through the Relationships.      # Loop through the Relationships.
489      foreach my $key (sort keys %{$relationshipList}) {      foreach my $key (sort keys %{$relationshipList}) {
490          # Display this item.          # Display this item.
491          my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key});          my $relationshipTitle = _ComputeRelationshipSentence($key, $relationshipList->{$key});
492          print HTMLOUT "<li><a href=\"#$key\">$relationshipTitle</a></li>\n";          $retVal .= "<li><a href=\"#$key\">$relationshipTitle</a></li>\n";
493      }      }
494      # Close off the relationship section and list the join table section.      # Close off the relationship section and list the join table section.
495      print HTMLOUT "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n";      $retVal .= "</ol></li>\n<li><a href=\"#JoinTable\">Join Table</a></li>\n";
496      # Close off the table of contents itself.      # Close off the table of contents itself.
497      print HTMLOUT "</ul>\n";      $retVal .=  "</ul>\n";
498      # Now we start with the actual data. Denote we're starting the entity section.      # Now we start with the actual data. Denote we're starting the entity section.
499      print HTMLOUT "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n";      $retVal .= "<a name=\"EntitiesSection\"></a><h2>Entities</h2>\n";
500      # Loop through the entities.      # Loop through the entities.
501      for my $key (sort keys %{$entityList}) {      for my $key (sort keys %{$entityList}) {
502          Trace("Building MetaData entry for $key entity.") if T(4);          Trace("Building MetaData entry for $key entity.") if T(4);
503          # Create the entity header. It contains a bookmark and the entity name.          # Create the entity header. It contains a bookmark and the entity name.
504          print HTMLOUT "<a name=\"$key\"></a><h3>$key</h3>\n";          $retVal .= "<a name=\"$key\"></a><h3>$key</h3>\n";
505          # Get the entity data.          # Get the entity data.
506          my $entityData = $entityList->{$key};          my $entityData = $entityList->{$key};
507          # If there's descriptive text, display it.          # If there's descriptive text, display it.
508          if (my $notes = $entityData->{Notes}) {          if (my $notes = $entityData->{Notes}) {
509              print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n";              $retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n";
510          }          }
511          # Now we want a list of the entity's relationships. First, we set up the relationship subsection.          # Now we want a list of the entity's relationships. First, we set up the relationship subsection.
512          print HTMLOUT "<h4>Relationships for <b>$key</b></h4>\n<ul>\n";          $retVal .= "<h4>Relationships for <b>$key</b></h4>\n<ul>\n";
513          # Loop through the relationships.          # Loop through the relationships.
514          for my $relationship (sort keys %{$relationshipList}) {          for my $relationship (sort keys %{$relationshipList}) {
515              # Get the relationship data.              # Get the relationship data.
# Line 444  Line 519 
519                  # Get the relationship sentence and append the arity.                  # Get the relationship sentence and append the arity.
520                  my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure);                  my $relationshipDescription = _ComputeRelationshipSentence($relationship, $relationshipStructure);
521                  # Display the relationship data.                  # Display the relationship data.
522                  print HTMLOUT "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n";                  $retVal .= "<li><a href=\"#$relationship\">$relationshipDescription</a></li>\n";
523              }              }
524          }          }
525          # Close off the relationship list.          # Close off the relationship list.
526          print HTMLOUT "</ul>\n";          $retVal .= "</ul>\n";
527          # Get the entity's relations.          # Get the entity's relations.
528          my $relationList = $entityData->{Relations};          my $relationList = $entityData->{Relations};
529          # Create a header for the relation subsection.          # Create a header for the relation subsection.
530          print HTMLOUT "<h4>Relations for <b>$key</b></h4>\n";          $retVal .= "<h4>Relations for <b>$key</b></h4>\n";
531          # Loop through the relations, displaying them.          # Loop through the relations, displaying them.
532          for my $relation (sort keys %{$relationList}) {          for my $relation (sort keys %{$relationList}) {
533              my $htmlString = _ShowRelationTable($relation, $relationList->{$relation});              my $htmlString = _ShowRelationTable($relation, $relationList->{$relation});
534              print HTMLOUT $htmlString;              $retVal .= $htmlString;
535          }          }
536      }      }
537      # Denote we're starting the relationship section.      # Denote we're starting the relationship section.
538      print HTMLOUT "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n";      $retVal .= "<a name=\"RelationshipsSection\"></a><h2>Relationships</h2>\n";
539      # Loop through the relationships.      # Loop through the relationships.
540      for my $key (sort keys %{$relationshipList}) {      for my $key (sort keys %{$relationshipList}) {
541          Trace("Building MetaData entry for $key relationship.") if T(4);          Trace("Building MetaData entry for $key relationship.") if T(4);
# Line 468  Line 543 
543          my $relationshipStructure = $relationshipList->{$key};          my $relationshipStructure = $relationshipList->{$key};
544          # Create the relationship header.          # Create the relationship header.
545          my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure);          my $headerText = _ComputeRelationshipHeading($key, $relationshipStructure);
546          print HTMLOUT "<h3><a name=\"$key\"></a>$headerText</h3>\n";          $retVal .= "<h3><a name=\"$key\"></a>$headerText</h3>\n";
547          # Get the entity names.          # Get the entity names.
548          my $fromEntity = $relationshipStructure->{from};          my $fromEntity = $relationshipStructure->{from};
549          my $toEntity = $relationshipStructure->{to};          my $toEntity = $relationshipStructure->{to};
# Line 478  Line 553 
553          # since both sentences will say the same thing.          # since both sentences will say the same thing.
554          my $arity = $relationshipStructure->{arity};          my $arity = $relationshipStructure->{arity};
555          if ($arity eq "11") {          if ($arity eq "11") {
556              print HTMLOUT "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n";              $retVal .= "<p>Each <b>$fromEntity</b> relates to at most one <b>$toEntity</b>.\n";
557          } else {          } else {
558              print HTMLOUT "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n";              $retVal .= "<p>Each <b>$fromEntity</b> relates to multiple <b>$toEntity</b>s.\n";
559              if ($arity eq "MM" && $fromEntity ne $toEntity) {              if ($arity eq "MM" && $fromEntity ne $toEntity) {
560                  print HTMLOUT "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n";                  $retVal .= "Each <b>$toEntity</b> relates to multiple <b>$fromEntity</b>s.\n";
561              }              }
562          }          }
563          print HTMLOUT "</p>\n";          $retVal .= "</p>\n";
564          # If there are notes on this relationship, display them.          # If there are notes on this relationship, display them.
565          if (my $notes = $relationshipStructure->{Notes}) {          if (my $notes = $relationshipStructure->{Notes}) {
566              print HTMLOUT "<p>" . _HTMLNote($notes->{content}) . "</p>\n";              $retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n";
567          }          }
568          # Generate the relationship's relation table.          # Generate the relationship's relation table.
569          my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key});          my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key});
570          print HTMLOUT $htmlString;          $retVal .= $htmlString;
571      }      }
572      Trace("Building MetaData join table.") if T(4);      Trace("Building MetaData join table.") if T(4);
573      # Denote we're starting the join table.      # Denote we're starting the join table.
574      print HTMLOUT "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n";      $retVal .= "<a name=\"JoinTable\"></a><h3>Join Table</h3>\n";
575      # Create a table header.      # Create a table header.
576      print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");      $retVal .= _OpenTable("Join Table", "Source", "Target", "Join Condition");
577      # Loop through the joins.      # Loop through the joins.
578      my $joinTable = $metadata->{Joins};      my $joinTable = $metadata->{Joins};
579      my @joinKeys = keys %{$joinTable};      my @joinKeys = keys %{$joinTable};
# Line 506  Line 581 
581          # Separate out the source, the target, and the join clause.          # Separate out the source, the target, and the join clause.
582          $joinKey =~ m!^([^/]+)/(.+)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
583          my ($sourceRelation, $targetRelation) = ($1, $2);          my ($sourceRelation, $targetRelation) = ($1, $2);
584          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
585          my $source = $self->ComputeObjectSentence($sourceRelation);          my $source = $self->ComputeObjectSentence($sourceRelation);
586          my $target = $self->ComputeObjectSentence($targetRelation);          my $target = $self->ComputeObjectSentence($targetRelation);
587          my $clause = $joinTable->{$joinKey};          my $clause = $joinTable->{$joinKey};
588          # Display them in a table row.          # Display them in a table row.
589          print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";          $retVal .= "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";
590      }      }
591      # Close the table.      # Close the table.
592      print HTMLOUT _CloseTable();      $retVal .= _CloseTable();
593      # Close the document.      Trace("Built MetaData HTML.") if T(3);
594      print HTMLOUT "</body>\n</html>\n";      # Return the HTML.
595      # Close the file.      return $retVal;
     close HTMLOUT;  
     Trace("Built MetaData web page.") if T(3);  
596  }  }
597    
598  =head3 DumpMetaData  =head3 DumpMetaData
599    
600  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
601    
602  Return a dump of the metadata structure.  Return a dump of the metadata structure.
603    
# Line 539  Line 612 
612    
613  =head3 CreateTables  =head3 CreateTables
614    
615  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
616    
617  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
618  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 551  Line 624 
624  sub CreateTables {  sub CreateTables {
625      # Get the parameters.      # Get the parameters.
626      my ($self) = @_;      my ($self) = @_;
627      my $metadata = $self->{_metaData};      # Get the relation names.
628      my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
629      # Loop through the entities.      # Loop through the relations.
630      my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
     for my $entityName (keys %{$entityHash}) {  
         my $entityData = $entityHash->{$entityName};  
         # Tell the user what we're doing.  
         Trace("Creating relations for entity $entityName.") if T(1);  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entityData->{Relations}}) {  
631              # Create a table for this relation.              # Create a table for this relation.
632              $self->CreateTable($relationName);              $self->CreateTable($relationName);
633              Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
         }  
     }  
     # Loop through the relationships.  
     my $relationshipTable = $metadata->{Relationships};  
     for my $relationshipName (keys %{$metadata->{Relationships}}) {  
         # Create a table for this relationship.  
         Trace("Creating relationship $relationshipName.") if T(1);  
         $self->CreateTable($relationshipName);  
634      }      }
635  }  }
636    
637  =head3 CreateTable  =head3 CreateTable
638    
639  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
640    
641  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
642    
# Line 587  Line 646 
646    
647  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
648    
649  =item $indexFlag  =item indexFlag
650    
651  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
652  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
653    
654    =item estimatedRows (optional)
655    
656    If specified, the estimated maximum number of rows for the relation. This
657    information allows the creation of tables using storage engines that are
658    faster but require size estimates, such as MyISAM.
659    
660  =back  =back
661    
662  =cut  =cut
663    
664  sub CreateTable {  sub CreateTable {
665      # Get the parameters.      # Get the parameters.
666      my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
667      # Get the database handle.      # Get the database handle.
668      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
669      # Get the relation data and determine whether or not the relation is primary.      # Get the relation data and determine whether or not the relation is primary.
# Line 622  Line 687 
687      # Insure the table is not already there.      # Insure the table is not already there.
688      $dbh->drop_table(tbl => $relationName);      $dbh->drop_table(tbl => $relationName);
689      Trace("Table $relationName dropped.") if T(2);      Trace("Table $relationName dropped.") if T(2);
690        # If there are estimated rows, create an estimate so we can take advantage of
691        # faster DB technologies.
692        my $estimation = undef;
693        if ($estimatedRows) {
694            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
695        }
696      # Create the table.      # Create the table.
697      Trace("Creating table $relationName: $fieldThing") if T(2);      Trace("Creating table $relationName: $fieldThing") if T(2);
698      $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
699      Trace("Relation $relationName created in database.") if T(2);      Trace("Relation $relationName created in database.") if T(2);
700      # If we want to build the indexes, we do it here.      # If we want to build the indexes, we do it here. Note that the full-text search
701        # index will not be built until the table has been loaded.
702      if ($indexFlag) {      if ($indexFlag) {
703          $self->CreateIndex($relationName);          $self->CreateIndex($relationName);
704      }      }
705  }  }
706    
707    =head3 VerifyFields
708    
709    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
710    
711    Run through the list of proposed field values, insuring that all the character fields are
712    below the maximum length. If any fields are too long, they will be truncated in place.
713    
714    =over 4
715    
716    =item relName
717    
718    Name of the relation for which the specified fields are destined.
719    
720    =item fieldList
721    
722    Reference to a list, in order, of the fields to be put into the relation.
723    
724    =item RETURN
725    
726    Returns the number of fields truncated.
727    
728    =back
729    
730    =cut
731    
732    sub VerifyFields {
733        # Get the parameters.
734        my ($self, $relName, $fieldList) = @_;
735        # Initialize the return value.
736        my $retVal = 0;
737        # Get the relation definition.
738        my $relData = $self->_FindRelation($relName);
739        # Get the list of field descriptors.
740        my $fieldTypes = $relData->{Fields};
741        my $fieldCount = scalar @{$fieldTypes};
742        # Loop through the two lists.
743        for (my $i = 0; $i < $fieldCount; $i++) {
744            # Get the type of the current field.
745            my $fieldType = $fieldTypes->[$i]->{type};
746            # If it's a character field, verify the length.
747            if ($fieldType =~ /string/) {
748                my $maxLen = $TypeTable{$fieldType}->{maxLen};
749                my $oldString = $fieldList->[$i];
750                if (length($oldString) > $maxLen) {
751                    # Here it's too big, so we truncate it.
752                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
753                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
754                    $retVal++;
755                }
756            }
757        }
758        # Return the truncation count.
759        return $retVal;
760    }
761    
762    =head3 DigestFields
763    
764    C<< $erdb->DigestFields($relName, $fieldList); >>
765    
766    Digest the strings in the field list that correspond to data type C<hash-string> in the
767    specified relation.
768    
769    =over 4
770    
771    =item relName
772    
773    Name of the relation to which the fields belong.
774    
775    =item fieldList
776    
777    List of field contents to be loaded into the relation.
778    
779    =back
780    
781    =cut
782    #: Return Type ;
783    sub DigestFields {
784        # Get the parameters.
785        my ($self, $relName, $fieldList) = @_;
786        # Get the relation definition.
787        my $relData = $self->_FindRelation($relName);
788        # Get the list of field descriptors.
789        my $fieldTypes = $relData->{Fields};
790        my $fieldCount = scalar @{$fieldTypes};
791        # Loop through the two lists.
792        for (my $i = 0; $i < $fieldCount; $i++) {
793            # Get the type of the current field.
794            my $fieldType = $fieldTypes->[$i]->{type};
795            # If it's a hash string, digest it in place.
796            if ($fieldType eq 'hash-string') {
797                $fieldList->[$i] = $self->DigestKey($fieldList->[$i]);
798            }
799        }
800    }
801    
802    =head3 DigestKey
803    
804    C<< my $digested = $erdb->DigestKey($keyValue); >>
805    
806    Return the digested value of a symbolic key. The digested value can then be plugged into a
807    key-based search into a table with key-type hash-string.
808    
809    Currently the digesting process is independent of the database structure, but that may not
810    always be the case, so this is an instance method instead of a static method.
811    
812    =over 4
813    
814    =item keyValue
815    
816    Key value to digest.
817    
818    =item RETURN
819    
820    Digested value of the key.
821    
822    =back
823    
824    =cut
825    
826    sub DigestKey {
827        # Get the parameters.
828        my ($self, $keyValue) = @_;
829        # Compute the digest.
830        my $retVal = md5_base64($keyValue);
831        # Return the result.
832        return $retVal;
833    }
834    
835  =head3 CreateIndex  =head3 CreateIndex
836    
837  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
838    
839  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
840  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
# Line 658  Line 858 
858          my @fieldList = _FixNames(@{$indexData->{IndexFields}});          my @fieldList = _FixNames(@{$indexData->{IndexFields}});
859          my $flds = join(', ', @fieldList);          my $flds = join(', ', @fieldList);
860          # Get the index's uniqueness flag.          # Get the index's uniqueness flag.
861          my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');          my $unique = (exists $indexData->{Unique} ? 'unique' : undef);
862          # Create the index.          # Create the index.
863          $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
864                                        flds => $flds, kind => $unique);
865            if ($rv) {
866          Trace("Index created: $indexName for $relationName ($flds)") if T(1);          Trace("Index created: $indexName for $relationName ($flds)") if T(1);
867            } else {
868                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
869            }
870      }      }
871  }  }
872    
873  =head3 LoadTables  =head3 LoadTables
874    
875  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
876    
877  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
878  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 710  Line 915 
915      $directoryName =~ s!/\\$!!;      $directoryName =~ s!/\\$!!;
916      # Declare the return variable.      # Declare the return variable.
917      my $retVal = Stats->new();      my $retVal = Stats->new();
918      # Get the metadata structure.      # Get the relation names.
919      my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
920      # Loop through the entities.      for my $relationName (@relNames) {
     for my $entity (values %{$metaData->{Entities}}) {  
         # Loop through the entity's relations.  
         for my $relationName (keys %{$entity->{Relations}}) {  
921              # Try to load this relation.              # Try to load this relation.
922              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);              my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
923              # Accumulate the statistics.              # Accumulate the statistics.
924              $retVal->Accumulate($result);              $retVal->Accumulate($result);
925          }          }
     }  
     # Loop through the relationships.  
     for my $relationshipName (keys %{$metaData->{Relationships}}) {  
         # Try to load this relationship's relation.  
         my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
         # Accumulate the statistics.  
         $retVal->Accumulate($result);  
     }  
926      # Add the duration of the load to the statistical object.      # Add the duration of the load to the statistical object.
927      $retVal->Add('duration', gettimeofday - $startTime);      $retVal->Add('duration', gettimeofday - $startTime);
928      # Return the accumulated statistics.      # Return the accumulated statistics.
929      return $retVal;      return $retVal;
930  }  }
931    
932    
933  =head3 GetTableNames  =head3 GetTableNames
934    
935  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
936    
937  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
938    
# Line 754  Line 949 
949    
950  =head3 GetEntityTypes  =head3 GetEntityTypes
951    
952  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
953    
954  Return a list of the entity type names.  Return a list of the entity type names.
955    
# Line 769  Line 964 
964      return sort keys %{$entityList};      return sort keys %{$entityList};
965  }  }
966    
967    =head3 IsEntity
968    
969    C<< my $flag = $erdb->IsEntity($entityName); >>
970    
971    Return TRUE if the parameter is an entity name, else FALSE.
972    
973    =over 4
974    
975    =item entityName
976    
977    Object name to be tested.
978    
979    =item RETURN
980    
981    Returns TRUE if the specified string is an entity name, else FALSE.
982    
983    =back
984    
985    =cut
986    
987    sub IsEntity {
988        # Get the parameters.
989        my ($self, $entityName) = @_;
990        # Test to see if it's an entity.
991        return exists $self->{_metaData}->{Entities}->{$entityName};
992    }
993    
994  =head3 Get  =head3 Get
995    
996  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, \@params); >>
997    
998  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
999  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 779  Line 1001 
1001  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
1002  $genus.  $genus.
1003    
1004  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>
1005    
1006  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
1007  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
1008    
1009  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
1010    
1011  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
1012  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 796  Line 1018 
1018  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
1019  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
1020    
1021  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>
1022    
1023  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
1024  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
1025  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
1026  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
1027  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
1028  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
1029  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
1030    
1031    If an entity or relationship is mentioned twice, the name for the second occurrence will
1032    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
1033    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
1034    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
1035    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
1036    
1037  =over 4  =over 4
1038    
1039  =item objectNames  =item objectNames
# Line 829  Line 1056 
1056    
1057  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1058    
1059    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
1060    be processed. The idea is to make it less likely to find the verb by accident.
1061    
1062  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
1063  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
1064  relation.  relation.
1065    
1066  =item param1, param2, ..., paramN  Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
1067    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
1068    a positive number. So, for example
1069    
1070    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
1071    
1072    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
1073    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
1074    use
1075    
1076  Parameter values to be substituted into the filter clause.  C<< "LIMIT 10" >>
1077    
1078    =item params
1079    
1080    Reference to a list of parameter values to be substituted into the filter clause.
1081    
1082  =item RETURN  =item RETURN
1083    
# Line 847  Line 1089 
1089    
1090  sub Get {  sub Get {
1091      # Get the parameters.      # Get the parameters.
1092      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $objectNames, $filterClause, $params) = @_;
1093      # Construct the SELECT statement. The general pattern is      # Process the SQL stuff.
1094      #      my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
1095      # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN          $self->_SetupSQL($objectNames, $filterClause);
1096      #      # Create the query.
1097      my $dbh = $self->{_dbh};      my $command = "SELECT DISTINCT " . join(".*, ", @{$mappedNameListRef}) .
1098      my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .          ".* $suffix";
1099                  join(', ', @{$objectNames});      my $sth = $self->_GetStatementHandle($command, $params);
1100      # Check for a filter clause.      # Now we create the relation map, which enables DBQuery to determine the order, name
1101      if ($filterClause) {      # and mapped name for each object in the query.
1102          # Here we have one, so we convert its field names and add it to the query. First,      my @relationMap = ();
1103          # We create a copy of the filter string we can work with.      for my $mappedName (@{$mappedNameListRef}) {
1104          my $filterString = $filterClause;          push @relationMap, [$mappedName, $mappedNameHashRef->{$mappedName}];
         # Next, we sort the object names by length. This helps protect us from finding  
         # object names inside other object names when we're doing our search and replace.  
         my @sortedNames = sort { length($b) - length($a) } @{$objectNames};  
         # We will also keep a list of conditions to add to the WHERE clause in order to link  
         # entities and relationships as well as primary relations to secondary ones.  
         my @joinWhere = ();  
         # The final preparatory step is to create a hash table of relation names. The  
         # table begins with the relation names already in the SELECT command.  
         my %fromNames = ();  
         for my $objectName (@sortedNames) {  
             $fromNames{$objectName} = 1;  
         }  
         # We are ready to begin. We loop through the object names, replacing each  
         # object name's field references by the corresponding SQL field reference.  
         # Along the way, if we find a secondary relation, we will need to add it  
         # to the FROM clause.  
         for my $objectName (@sortedNames) {  
             # Get the length of the object name plus 2. This is the value we add to the  
             # size of the field name to determine the size of the field reference as a  
             # whole.  
             my $nameLength = 2 + length $objectName;  
             # Get the object's field list.  
             my $fieldList = $self->_GetFieldTable($objectName);  
             # Find the field references for this object.  
             while ($filterString =~ m/$objectName\(([^)]*)\)/g) {  
                 # At this point, $1 contains the field name, and the current position  
                 # is set immediately after the final parenthesis. We pull out the name of  
                 # the field and the position and length of the field reference as a whole.  
                 my $fieldName = $1;  
                 my $len = $nameLength + length $fieldName;  
                 my $pos = pos($filterString) - $len;  
                 # Insure the field exists.  
                 if (!exists $fieldList->{$fieldName}) {  
                     Confess("Field $fieldName not found for object $objectName.");  
                 } else {  
                     # Get the field's relation.  
                     my $relationName = $fieldList->{$fieldName}->{relation};  
                     # Insure the relation is in the FROM clause.  
                     if (!exists $fromNames{$relationName}) {  
                         # Add the relation to the FROM clause.  
                         $command .= ", $relationName";  
                         # Create its join sub-clause.  
                         push @joinWhere, "$objectName.id = $relationName.id";  
                         # Denote we have it available for future fields.  
                         $fromNames{$relationName} = 1;  
                     }  
                     # Form an SQL field reference from the relation name and the field name.  
                     my $sqlReference = "$relationName." . _FixName($fieldName);  
                     # Put it into the filter string in place of the old value.  
                     substr($filterString, $pos, $len) = $sqlReference;  
                     # Reposition the search.  
                     pos $filterString = $pos + length $sqlReference;  
                 }  
             }  
         }  
         # The next step is to join the objects together. We only need to do this if there  
         # is more than one object in the object list. We start with the first object and  
         # run through the objects after it. Note also that we make a safety copy of the  
         # list before running through it.  
         my @objectList = @{$objectNames};  
         my $lastObject = shift @objectList;  
         # Get the join table.  
         my $joinTable = $self->{_metaData}->{Joins};  
         # Loop through the object list.  
         for my $thisObject (@objectList) {  
             # Look for a join.  
             my $joinKey = "$lastObject/$thisObject";  
             if (!exists $joinTable->{$joinKey}) {  
                 # Here there's no join, so we throw an error.  
                 Confess("No join exists to connect from $lastObject to $thisObject.");  
             } else {  
                 # Get the join clause and add it to the WHERE list.  
                 push @joinWhere, $joinTable->{$joinKey};  
                 # Save this object as the last object for the next iteration.  
                 $lastObject = $thisObject;  
             }  
         }  
         # Now we need to handle the whole ORDER BY thing. We'll put the order by clause  
         # in the following variable.  
         my $orderClause = "";  
         # Locate the ORDER BY verb (if any).  
         if ($filterString =~ m/^(.*)ORDER BY/g) {  
             # Here we have an ORDER BY verb. Split it off of the filter string.  
             my $pos = pos $filterString;  
             $orderClause = substr($filterString, $pos);  
             $filterString = $1;  
         }  
         # Add the filter and the join clauses (if any) to the SELECT command.  
         if ($filterString) {  
             push @joinWhere, "($filterString)";  
         }  
         if (@joinWhere) {  
             $command .= " WHERE " . join(' AND ', @joinWhere);  
         }  
         # Add the sort clause (if any) to the SELECT command.  
         if ($orderClause) {  
             $command .= " ORDER BY $orderClause";  
         }  
1105      }      }
     Trace("SQL query: $command") if T(2);  
     Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));  
     my $sth = $dbh->prepare_command($command);  
     # Execute it with the parameters bound in.  
     $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());  
1106      # Return the statement object.      # Return the statement object.
1107      my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1108      return $retVal;      return $retVal;
1109  }  }
1110    
1111  =head3 GetList  =head3 Search
   
 C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  
1112    
1113  Return a list of object descriptors for the specified objects as determined by the  C<< my $query = $erdb->Search($searchExpression, $idx, \@objectNames, $filterClause, \@params); >>
 specified filter clause.  
1114    
1115  This method is essentially the same as L</Get> except it returns a list of objects rather  Perform a full text search with filtering. The search will be against a specified object
1116  than a query object that can be used to get the results one record at a time.  in the object name list. That object will get an extra field containing the search
1117    relevance. Note that except for the search expression, the parameters of this method are
1118    the same as those for L</Get> and follow the same rules.
1119    
1120  =over 4  =over 4
1121    
1122    =item searchExpression
1123    
1124    Boolean search expression for the text fields of the target object.
1125    
1126    =item idx
1127    
1128    Index in the I<$objectNames> list of the table to be searched in full-text mode.
1129    
1130  =item objectNames  =item objectNames
1131    
1132  List containing the names of the entity and relationship objects to be retrieved.  List containing the names of the entity and relationship objects to be retrieved.
# Line 995  Line 1141 
1141  or secondary entity relations; however, all of the entities and relationships involved must  or secondary entity relations; however, all of the entities and relationships involved must
1142  be included in the list of object names.  be included in the list of object names.
1143    
1144  The filter clause can also specify a sort order. To do this, simply follow the filter string  =item params
 with an ORDER BY clause. For example, the following filter string gets all genomes for a  
 particular genus and sorts them by species name.  
   
 C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  
   
 The rules for field references in a sort order are the same as those for field references in the  
 filter clause in general; however, odd things may happen if a sort field is from a secondary  
 relation.  
   
 =item param1, param2, ..., paramN  
1145    
1146  Parameter values to be substituted into the filter clause.  Reference to a list of parameter values to be substituted into the filter clause.
1147    
1148  =item RETURN  =item RETURN
1149    
1150  Returns a list of B<DBObject>s that satisfy the query conditions.  Returns a query object for the specified search.
1151    
1152  =back  =back
1153    
1154  =cut  =cut
1155  #: Return Type @%  
1156  sub GetList {  sub Search {
1157      # Get the parameters.      # Get the parameters.
1158      my ($self, $objectNames, $filterClause, @params) = @_;      my ($self, $searchExpression, $idx, $objectNames, $filterClause, $params) = @_;
1159      # Declare the return variable.      # Declare the return variable.
1160      my @retVal = ();      my $retVal;
1161      # Perform the query.      # Create a safety copy of the parameter list.
1162      my $query = $self->Get($objectNames, $filterClause, @params);      my @myParams = @{$params};
1163      # Loop through the results.      # Get the first object's structure so we have access to the searchable fields.
1164      while (my $object = $query->Fetch) {      my $object1Name = $objectNames->[$idx];
1165          push @retVal, $object;      my $object1Structure = $self->_GetStructure($object1Name);
1166        # Get the field list.
1167        if (! exists $object1Structure->{searchFields}) {
1168            Confess("No searchable index for $object1Name.");
1169        } else {
1170            # Get the field list.
1171            my @fields = @{$object1Structure->{searchFields}};
1172            # Clean the search expression.
1173            my $actualKeywords = $self->CleanKeywords($searchExpression);
1174            # We need two match expressions, one for the filter clause and one in the
1175            # query itself. Both will use a parameter mark, so we need to push the
1176            # search expression onto the front of the parameter list twice.
1177            unshift @myParams, $actualKeywords, $actualKeywords;
1178            # Build the match expression.
1179            my @matchFilterFields = map { "$object1Name." . _FixName($_) } @fields;
1180            my $matchClause = "MATCH (" . join(", ", @matchFilterFields) . ") AGAINST (? IN BOOLEAN MODE)";
1181            # Process the SQL stuff.
1182            my ($suffix, $mappedNameListRef, $mappedNameHashRef) =
1183                $self->_SetupSQL($objectNames, $filterClause, $matchClause);
1184            # Create the query. Note that the match clause is inserted at the front of
1185            # the select fields.
1186            my $command = "SELECT DISTINCT $matchClause, " . join(".*, ", @{$mappedNameListRef}) .
1187                ".* $suffix";
1188            my $sth = $self->_GetStatementHandle($command, \@myParams);
1189            # Now we create the relation map, which enables DBQuery to determine the order, name
1190            # and mapped name for each object in the query.
1191            my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef);
1192            # Return the statement object.
1193            $retVal = DBQuery::_new($self, $sth, \@relationMap, $object1Name);
1194        }
1195        return $retVal;
1196    }
1197    
1198    =head3 GetFlat
1199    
1200    C<< my @list = $erdb->GetFlat(\@objectNames, $filterClause, \@parameterList, $field); >>
1201    
1202    This is a variation of L</GetAll> that asks for only a single field per record and
1203    returns a single flattened list.
1204    
1205    =over 4
1206    
1207    =item objectNames
1208    
1209    List containing the names of the entity and relationship objects to be retrieved.
1210    
1211    =item filterClause
1212    
1213    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1214    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1215    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1216    parameter list as additional parameters. The fields in a filter clause can come from primary
1217    entity relations, relationship relations, or secondary entity relations; however, all of the
1218    entities and relationships involved must be included in the list of object names.
1219    
1220    =item parameterList
1221    
1222    List of the parameters to be substituted in for the parameters marks in the filter clause.
1223    
1224    =item field
1225    
1226    Name of the field to be used to get the elements of the list returned.
1227    
1228    =item RETURN
1229    
1230    Returns a list of values.
1231    
1232    =back
1233    
1234    =cut
1235    #: Return Type @;
1236    sub GetFlat {
1237        # Get the parameters.
1238        my ($self, $objectNames, $filterClause, $parameterList, $field) = @_;
1239        # Construct the query.
1240        my $query = $self->Get($objectNames, $filterClause, $parameterList);
1241        # Create the result list.
1242        my @retVal = ();
1243        # Loop through the records, adding the field values found to the result list.
1244        while (my $row = $query->Fetch()) {
1245            push @retVal, $row->Value($field);
1246        }
1247        # Return the list created.
1248        return @retVal;
1249    }
1250    
1251    =head3 Delete
1252    
1253    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1254    
1255    Delete an entity instance from the database. The instance is deleted along with all entity and
1256    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1257    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1258    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1259    dependent relationship.
1260    
1261    =over 4
1262    
1263    =item entityName
1264    
1265    Name of the entity type for the instance being deleted.
1266    
1267    =item objectID
1268    
1269    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1270    then it is presumed to by a LIKE pattern.
1271    
1272    =item testFlag
1273    
1274    If TRUE, the delete statements will be traced without being executed.
1275    
1276    =item RETURN
1277    
1278    Returns a statistics object indicating how many records of each particular table were
1279    deleted.
1280    
1281    =back
1282    
1283    =cut
1284    #: Return Type $%;
1285    sub Delete {
1286        # Get the parameters.
1287        my ($self, $entityName, $objectID, $testFlag) = @_;
1288        # Declare the return variable.
1289        my $retVal = Stats->new();
1290        # Get the DBKernel object.
1291        my $db = $self->{_dbh};
1292        # We're going to generate all the paths branching out from the starting entity. One of
1293        # the things we have to be careful about is preventing loops. We'll use a hash to
1294        # determine if we've hit a loop.
1295        my %alreadyFound = ();
1296        # These next lists will serve as our result stack. We start by pushing object lists onto
1297        # the stack, and then popping them off to do the deletes. This means the deletes will
1298        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1299        # sure we don't delete records that might be needed to forge relationships back to the
1300        # original item. We have two lists-- one for TO-relationships, and one for
1301        # FROM-relationships and entities.
1302        my @fromPathList = ();
1303        my @toPathList = ();
1304        # This final hash is used to remember what work still needs to be done. We push paths
1305        # onto the list, then pop them off to extend the paths. We prime it with the starting
1306        # point. Note that we will work hard to insure that the last item on a path in the
1307        # TODO list is always an entity.
1308        my @todoList = ([$entityName]);
1309        while (@todoList) {
1310            # Get the current path.
1311            my $current = pop @todoList;
1312            # Copy it into a list.
1313            my @stackedPath = @{$current};
1314            # Pull off the last item on the path. It will always be an entity.
1315            my $entityName = pop @stackedPath;
1316            # Add it to the alreadyFound list.
1317            $alreadyFound{$entityName} = 1;
1318            # Get the entity data.
1319            my $entityData = $self->_GetStructure($entityName);
1320            # The first task is to loop through the entity's relation. A DELETE command will
1321            # be needed for each of them.
1322            my $relations = $entityData->{Relations};
1323            for my $relation (keys %{$relations}) {
1324                my @augmentedList = (@stackedPath, $relation);
1325                push @fromPathList, \@augmentedList;
1326            }
1327            # Now we need to look for relationships connected to this entity.
1328            my $relationshipList = $self->{_metaData}->{Relationships};
1329            for my $relationshipName (keys %{$relationshipList}) {
1330                my $relationship = $relationshipList->{$relationshipName};
1331                # Check the FROM field. We're only interested if it's us.
1332                if ($relationship->{from} eq $entityName) {
1333                    # Add the path to this relationship.
1334                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1335                    push @fromPathList, \@augmentedList;
1336                    # Check the arity. If it's MM we're done. If it's 1M
1337                    # and the target hasn't been seen yet, we want to
1338                    # stack the entity for future processing.
1339                    if ($relationship->{arity} eq '1M') {
1340                        my $toEntity = $relationship->{to};
1341                        if (! exists $alreadyFound{$toEntity}) {
1342                            # Here we have a new entity that's dependent on
1343                            # the current entity, so we need to stack it.
1344                            my @stackList = (@augmentedList, $toEntity);
1345                            push @fromPathList, \@stackList;
1346                        } else {
1347                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1348                        }
1349                    }
1350                }
1351                # Now check the TO field. In this case only the relationship needs
1352                # deletion.
1353                if ($relationship->{to} eq $entityName) {
1354                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1355                    push @toPathList, \@augmentedList;
1356                }
1357            }
1358        }
1359        # Create the first qualifier for the WHERE clause. This selects the
1360        # keys of the primary entity records to be deleted. When we're deleting
1361        # from a dependent table, we construct a join page from the first qualifier
1362        # to the table containing the dependent records to delete.
1363        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1364        # We need to make two passes. The first is through the to-list, and
1365        # the second through the from-list. The from-list is second because
1366        # the to-list may need to pass through some of the entities the
1367        # from-list would delete.
1368        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1369        # Now it's time to do the deletes. We do it in two passes.
1370        for my $keyName ('to_link', 'from_link') {
1371            # Get the list for this key.
1372            my @pathList = @{$stackList{$keyName}};
1373            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1374            # Loop through this list.
1375            while (my $path = pop @pathList) {
1376                # Get the table whose rows are to be deleted.
1377                my @pathTables = @{$path};
1378                # Start the DELETE statement. We need to call DBKernel because the
1379                # syntax of a DELETE-USING varies among DBMSs.
1380                my $target = $pathTables[$#pathTables];
1381                my $stmt = $db->SetUsing(@pathTables);
1382                # Now start the WHERE. The first thing is the ID field from the starting table. That
1383                # starting table will either be the entity relation or one of the entity's
1384                # sub-relations.
1385                $stmt .= " WHERE $pathTables[0].id $qualifier";
1386                # Now we run through the remaining entities in the path, connecting them up.
1387                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1388                    # Connect the current relationship to the preceding entity.
1389                    my ($entity, $rel) = @pathTables[$i-1,$i];
1390                    # The style of connection depends on the direction of the relationship.
1391                    $stmt .= " AND $entity.id = $rel.$keyName";
1392                    if ($i + 1 <= $#pathTables) {
1393                        # Here there's a next entity, so connect that to the relationship's
1394                        # to-link.
1395                        my $entity2 = $pathTables[$i+1];
1396                        $stmt .= " AND $rel.to_link = $entity2.id";
1397                    }
1398                }
1399                # Now we have our desired DELETE statement.
1400                if ($testFlag) {
1401                    # Here the user wants to trace without executing.
1402                    Trace($stmt) if T(0);
1403                } else {
1404                    # Here we can delete. Note that the SQL method dies with a confessing
1405                    # if an error occurs, so we just go ahead and do it.
1406                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1407                    my $rv = $db->SQL($stmt, 0, $objectID);
1408                    # Accumulate the statistics for this delete. The only rows deleted
1409                    # are from the target table, so we use its name to record the
1410                    # statistic.
1411                    $retVal->Add($target, $rv);
1412                }
1413            }
1414        }
1415        # Return the result.
1416        return $retVal;
1417    }
1418    
1419    =head3 SortNeeded
1420    
1421    C<< my $parms = $erdb->SortNeeded($relationName); >>
1422    
1423    Return the pipe command for the sort that should be applied to the specified
1424    relation when creating the load file.
1425    
1426    For example, if the load file should be sorted ascending by the first
1427    field, this method would return
1428    
1429        sort -k1 -t"\t"
1430    
1431    If the first field is numeric, the method would return
1432    
1433        sort -k1n -t"\t"
1434    
1435    Unfortunately, due to a bug in the C<sort> command, we cannot eliminate duplicate
1436    keys using a sort.
1437    
1438    =over 4
1439    
1440    =item relationName
1441    
1442    Name of the relation to be examined.
1443    
1444    =item
1445    
1446    Returns the sort command to use for sorting the relation, suitable for piping.
1447    
1448    =back
1449    
1450    =cut
1451    #: Return Type $;
1452    sub SortNeeded {
1453        # Get the parameters.
1454        my ($self, $relationName) = @_;
1455        # Declare a descriptor to hold the names of the key fields.
1456        my @keyNames = ();
1457        # Get the relation structure.
1458        my $relationData = $self->_FindRelation($relationName);
1459        # Find out if the relation is a primary entity relation,
1460        # a relationship relation, or a secondary entity relation.
1461        my $entityTable = $self->{_metaData}->{Entities};
1462        my $relationshipTable = $self->{_metaData}->{Relationships};
1463        if (exists $entityTable->{$relationName}) {
1464            # Here we have a primary entity relation.
1465            push @keyNames, "id";
1466        } elsif (exists $relationshipTable->{$relationName}) {
1467            # Here we have a relationship. We sort using the FROM index.
1468            my $relationshipData = $relationshipTable->{$relationName};
1469            my $index = $relationData->{Indexes}->{"idx${relationName}From"};
1470            push @keyNames, @{$index->{IndexFields}};
1471        } else {
1472            # Here we have a secondary entity relation, so we have a sort on the ID field.
1473            push @keyNames, "id";
1474        }
1475        # Now we parse the key names into sort parameters. First, we prime the return
1476        # string.
1477        my $retVal = "sort -t\"\t\" ";
1478        # Get the relation's field list.
1479        my @fields = @{$relationData->{Fields}};
1480        # Loop through the keys.
1481        for my $keyData (@keyNames) {
1482            # Get the key and the ordering.
1483            my ($keyName, $ordering);
1484            if ($keyData =~ /^([^ ]+) DESC/) {
1485                ($keyName, $ordering) = ($1, "descending");
1486            } else {
1487                ($keyName, $ordering) = ($keyData, "ascending");
1488            }
1489            # Find the key's position and type.
1490            my $fieldSpec;
1491            for (my $i = 0; $i <= $#fields && ! $fieldSpec; $i++) {
1492                my $thisField = $fields[$i];
1493                if ($thisField->{name} eq $keyName) {
1494                    # Get the sort modifier for this field type. The modifier
1495                    # decides whether we're using a character, numeric, or
1496                    # floating-point sort.
1497                    my $modifier = $TypeTable{$thisField->{type}}->{sort};
1498                    # If the index is descending for this field, denote we want
1499                    # to reverse the sort order on this field.
1500                    if ($ordering eq 'descending') {
1501                        $modifier .= "r";
1502                    }
1503                    # Store the position and modifier into the field spec, which
1504                    # will stop the inner loop. Note that the field number is
1505                    # 1-based in the sort command, so we have to increment the
1506                    # index.
1507                    $fieldSpec = ($i + 1) . $modifier;
1508                }
1509            }
1510            # Add this field to the sort command.
1511            $retVal .= " -k$fieldSpec";
1512        }
1513        # Return the result.
1514        return $retVal;
1515    }
1516    
1517    =head3 GetList
1518    
1519    C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, \@params); >>
1520    
1521    Return a list of object descriptors for the specified objects as determined by the
1522    specified filter clause.
1523    
1524    This method is essentially the same as L</Get> except it returns a list of objects rather
1525    than a query object that can be used to get the results one record at a time.
1526    
1527    =over 4
1528    
1529    =item objectNames
1530    
1531    List containing the names of the entity and relationship objects to be retrieved.
1532    
1533    =item filterClause
1534    
1535    WHERE clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1536    be parameterized with parameter markers (C<?>). Each field used in the WHERE clause must be
1537    specified in the standard form B<I<objectName>(I<fieldName>)>. Any parameters specified
1538    in the filter clause should be added to the parameter list as additional parameters. The
1539    fields in a filter clause can come from primary entity relations, relationship relations,
1540    or secondary entity relations; however, all of the entities and relationships involved must
1541    be included in the list of object names.
1542    
1543    The filter clause can also specify a sort order. To do this, simply follow the filter string
1544    with an ORDER BY clause. For example, the following filter string gets all genomes for a
1545    particular genus and sorts them by species name.
1546    
1547    C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1548    
1549    The rules for field references in a sort order are the same as those for field references in the
1550    filter clause in general; however, odd things may happen if a sort field is from a secondary
1551    relation.
1552    
1553    =item params
1554    
1555    Reference to a list of parameter values to be substituted into the filter clause.
1556    
1557    =item RETURN
1558    
1559    Returns a list of B<DBObject>s that satisfy the query conditions.
1560    
1561    =back
1562    
1563    =cut
1564    #: Return Type @%
1565    sub GetList {
1566        # Get the parameters.
1567        my ($self, $objectNames, $filterClause, $params) = @_;
1568        # Declare the return variable.
1569        my @retVal = ();
1570        # Perform the query.
1571        my $query = $self->Get($objectNames, $filterClause, $params);
1572        # Loop through the results.
1573        while (my $object = $query->Fetch) {
1574            push @retVal, $object;
1575      }      }
1576      # Return the result.      # Return the result.
1577      return @retVal;      return @retVal;
1578  }  }
1579    
1580    =head3 GetCount
1581    
1582    C<< my $count = $erdb->GetCount(\@objectNames, $filter, \@params); >>
1583    
1584    Return the number of rows found by a specified query. This method would
1585    normally be used to count the records in a single table. For example, in a
1586    genetics database
1587    
1588        my $count = $erdb->GetCount(['Genome'], 'Genome(genus-species) LIKE ?', ['homo %']);
1589    
1590    would return the number of genomes for the genus I<homo>. It is conceivable, however,
1591    to use it to return records based on a join. For example,
1592    
1593        my $count = $erdb->GetCount(['HasFeature', 'Genome'], 'Genome(genus-species) LIKE ?',
1594                                    ['homo %']);
1595    
1596    would return the number of features for genomes in the genus I<homo>. Note that
1597    only the rows from the first table are counted. If the above command were
1598    
1599        my $count = $erdb->GetCount(['Genome', 'Feature'], 'Genome(genus-species) LIKE ?',
1600                                    ['homo %']);
1601    
1602    it would return the number of genomes, not the number of genome/feature pairs.
1603    
1604    =over 4
1605    
1606    =item objectNames
1607    
1608    Reference to a list of the objects (entities and relationships) included in the
1609    query.
1610    
1611    =item filter
1612    
1613    A filter clause for restricting the query. The rules are the same as for the L</Get>
1614    method.
1615    
1616    =item params
1617    
1618    Reference to a list of the parameter values to be substituted for the parameter marks
1619    in the filter.
1620    
1621    =item RETURN
1622    
1623    Returns a count of the number of records in the first table that would satisfy
1624    the query.
1625    
1626    =back
1627    
1628    =cut
1629    
1630    sub GetCount {
1631        # Get the parameters.
1632        my ($self, $objectNames, $filter, $params) = @_;
1633        # Insure the params argument is an array reference if the caller left it off.
1634        if (! defined($params)) {
1635            $params = [];
1636        }
1637        # Declare the return variable.
1638        my $retVal;
1639        # Find out if we're counting an entity or a relationship.
1640        my $countedField;
1641        if ($self->IsEntity($objectNames->[0])) {
1642            $countedField = "id";
1643        } else {
1644            # For a relationship we count the to-link because it's usually more
1645            # numerous. Note we're automatically converting to the SQL form
1646            # of the field name (to_link vs. to-link).
1647            $countedField = "to_link";
1648        }
1649        # Create the SQL command suffix to get the desired records.
1650        my ($suffix, $mappedNameListRef, $mappedNameHashRef) = $self->_SetupSQL($objectNames,
1651                                                                                $filter);
1652        # Prefix it with text telling it we want a record count.
1653        my $firstObject = $mappedNameListRef->[0];
1654        my $command = "SELECT COUNT($firstObject.$countedField) $suffix";
1655        # Prepare and execute the command.
1656        my $sth = $self->_GetStatementHandle($command, $params);
1657        # Get the count value.
1658        ($retVal) = $sth->fetchrow_array();
1659        # Check for a problem.
1660        if (! defined($retVal)) {
1661            if ($sth->err) {
1662                # Here we had an SQL error.
1663                Confess("Error retrieving row count: " . $sth->errstr());
1664            } else {
1665                # Here we have no result.
1666                Confess("No result attempting to retrieve row count.");
1667            }
1668        }
1669        # Return the result.
1670        return $retVal;
1671    }
1672    
1673  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1674    
1675  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1676    
1677  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1678    
# Line 1069  Line 1707 
1707    
1708  =head3 DumpRelations  =head3 DumpRelations
1709    
1710  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1711    
1712  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1713  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 1109  Line 1747 
1747      }      }
1748  }  }
1749    
1750    =head3 InsertValue
1751    
1752    C<< $erdb->InsertValue($entityID, $fieldName, $value); >>
1753    
1754    This method will insert a new value into the database. The value must be one
1755    associated with a secondary relation, since primary values cannot be inserted:
1756    they occur exactly once. Secondary values, on the other hand, can be missing
1757    or multiply-occurring.
1758    
1759    =over 4
1760    
1761    =item entityID
1762    
1763    ID of the object that is to receive the new value.
1764    
1765    =item fieldName
1766    
1767    Field name for the new value-- this includes the entity name, since
1768    field names are of the format I<objectName>C<(>I<fieldName>C<)>.
1769    
1770    =item value
1771    
1772    New value to be put in the field.
1773    
1774    =back
1775    
1776    =cut
1777    
1778    sub InsertValue {
1779        # Get the parameters.
1780        my ($self, $entityID, $fieldName, $value) = @_;
1781        # Parse the entity name and the real field name.
1782        if ($fieldName =~ /^([^(]+)\(([^)]+)\)/) {
1783            my $entityName = $1;
1784            my $fieldTitle = $2;
1785            # Get its descriptor.
1786            if (!$self->IsEntity($entityName)) {
1787                Confess("$entityName is not a valid entity.");
1788            } else {
1789                my $entityData = $self->{_metaData}->{Entities}->{$entityName};
1790                # Find the relation containing this field.
1791                my $fieldHash = $entityData->{Fields};
1792                if (! exists $fieldHash->{$fieldTitle}) {
1793                    Confess("$fieldTitle not found in $entityName.");
1794                } else {
1795                    my $relation = $fieldHash->{$fieldTitle}->{relation};
1796                    if ($relation eq $entityName) {
1797                        Confess("Cannot do InsertValue on primary field $fieldTitle of $entityName.");
1798                    } else {
1799                        # Now we can create an INSERT statement.
1800                        my $dbh = $self->{_dbh};
1801                        my $fixedName = _FixName($fieldTitle);
1802                        my $statement = "INSERT INTO $relation (id, $fixedName) VALUES(?, ?)";
1803                        # Execute the command.
1804                        $dbh->SQL($statement, 0, $entityID, $value);
1805                    }
1806                }
1807            }
1808        } else {
1809            Confess("$fieldName is not a valid field name.");
1810        }
1811    }
1812    
1813  =head3 InsertObject  =head3 InsertObject
1814    
1815  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1816    
1817  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1818  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 1120  Line 1821 
1821  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1822  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1823    
1824  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1825    
1826  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1827  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1828    
1829  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1830    
1831  =over 4  =over 4
1832    
# Line 1250  Line 1951 
1951    
1952  =head3 LoadTable  =head3 LoadTable
1953    
1954  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1955    
1956  Load data from a tab-delimited file into a specified table, optionally re-creating the table  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1957  first.  first.
# Line 1271  Line 1972 
1972    
1973  =item RETURN  =item RETURN
1974    
1975  Returns a statistical object containing the number of records read and a list of  Returns a statistical object containing a list of the error messages.
 the error messages.  
1976    
1977  =back  =back
1978    
# Line 1286  Line 1986 
1986      Trace("Loading table $relationName from $fileName") if T(2);      Trace("Loading table $relationName from $fileName") if T(2);
1987      # Get the database handle.      # Get the database handle.
1988      my $dbh = $self->{_dbh};      my $dbh = $self->{_dbh};
1989        # Get the input file size.
1990        my $fileSize = -s $fileName;
1991      # Get the relation data.      # Get the relation data.
1992      my $relation = $self->_FindRelation($relationName);      my $relation = $self->_FindRelation($relationName);
1993      # Check the truncation flag.      # Check the truncation flag.
1994      if ($truncateFlag) {      if ($truncateFlag) {
1995          Trace("Creating table $relationName") if T(2);          Trace("Creating table $relationName") if T(2);
1996            # Compute the row count estimate. We take the size of the load file,
1997            # divide it by the estimated row size, and then multiply by 1.5 to
1998            # leave extra room. We postulate a minimum row count of 1000 to
1999            # prevent problems with incoming empty load files.
2000            my $rowSize = $self->EstimateRowSize($relationName);
2001            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
2002          # Re-create the table without its index.          # Re-create the table without its index.
2003          $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
2004          # If this is a pre-index DBMS, create the index here.          # If this is a pre-index DBMS, create the index here.
2005          if ($dbh->{_preIndex}) {          if ($dbh->{_preIndex}) {
2006              eval {              eval {
# Line 1303  Line 2011 
2011              }              }
2012          }          }
2013      }      }
     # Determine whether or not this is a primary relation. Primary relations have an extra  
     # field indicating whether or not a given object is new or was loaded from the flat files.  
     my $primary = $self->_IsPrimary($relationName);  
     # Get the number of fields in this relation.  
     my @fieldList = @{$relation->{Fields}};  
     my $fieldCount = @fieldList;  
     # Start a database transaction.  
     $dbh->begin_tran;  
     # Open the relation file. We need to create a cleaned-up copy before loading.  
     open TABLEIN, '<', $fileName;  
     my $tempName = "$fileName.tbl";  
     open TABLEOUT, '>', $tempName;  
     my $inputCount = 0;  
     # Loop through the file.  
     while (<TABLEIN>) {  
         $inputCount++;  
         # Chop off the new-line character.  
         my $record = Tracer::Strip($_);  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Insure the number of fields is correct.  
             my @fields = split /\t/, $record;  
             while (@fields > $fieldCount) {  
                 my $extraField = $fields[$#fields];  
                 delete $fields[$#fields];  
                 if ($extraField) {  
                     Trace("Nonblank extra field value \"$extraField\" deleted from record $inputCount of $fileName.") if T(1);  
                 }  
             }  
             while (@fields < $fieldCount) {  
                 push @fields, "";  
             }  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 push @fields, "0";  
             }  
             # Write the record.  
             $record = join "\t", @fields;  
             print TABLEOUT "$record\n";  
             # Count the record written.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         } else {  
             # Here we have a blank record.  
             $retVal->Add('skipped');  
         }  
     }  
     # Close the files.  
     close TABLEIN;  
     close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(2);  
2014      # Load the table.      # Load the table.
2015      my $rv;      my $rv;
2016      eval {      eval {
2017          $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
2018      };      };
2019      if (!defined $rv) {      if (!defined $rv) {
2020          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
2021          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
2022          Trace("Table load failed for $relationName.") if T(1);          Trace("Table load failed for $relationName.") if T(1);
2023      } else {      } else {
2024          # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
2025          Trace("$retVal->{records} records read for $relationName.") if T(2);          $retVal->Add("tables");
2026            my $size = -s $fileName;
2027            Trace("$size bytes loaded into $relationName.") if T(2);
2028          # If we're rebuilding, we need to create the table indexes.          # If we're rebuilding, we need to create the table indexes.
2029          if ($truncateFlag && ! $dbh->{_preIndex}) {          if ($truncateFlag) {
2030                # Indexes are created here for PostGres. For PostGres, indexes are
2031                # best built at the end. For MySQL, the reverse is true.
2032                if (! $dbh->{_preIndex}) {
2033              eval {              eval {
2034                  $self->CreateIndex($relationName);                  $self->CreateIndex($relationName);
2035              };              };
# Line 1379  Line 2037 
2037                  $retVal->AddMessage($@);                  $retVal->AddMessage($@);
2038              }              }
2039          }          }
2040          # Analyze the table to help optimize tables.              # The full-text index (if any) is always built last, even for MySQL.
2041          $dbh->vacuum_it($relationName);              # First we need to see if this table has a full-text index. Only
2042                # primary relations are allowed that privilege.
2043                if ($self->_IsPrimary($relationName)) {
2044                    # Get the relation's entity/relationship structure.
2045                    my $structure = $self->_GetStructure($relationName);
2046                    # Check for a searchable fields list.
2047                    if (exists $structure->{searchFields}) {
2048                        # Here we know that we need to create a full-text search index.
2049                        # Get an SQL-formatted field name list.
2050                        my $fields = join(", ", $self->_FixNames(@{$structure->{searchFields}}));
2051                        # Create the index.
2052                        $dbh->create_index(tbl => $relationName, idx => "search_idx_$relationName",
2053                                           flds => $fields, kind => 'fulltext');
2054                    }
2055                }
2056            }
2057      }      }
2058      # Commit the database changes.      # Analyze the table to improve performance.
2059      $dbh->commit_tran;      Trace("Analyzing and compacting $relationName.") if T(3);
2060      # Delete the temporary file.      $dbh->vacuum_it($relationName);
2061      unlink $tempName;      Trace("$relationName load completed.") if T(3);
2062      # Return the statistics.      # Return the statistics.
2063      return $retVal;      return $retVal;
2064  }  }
2065    
2066  =head3 GenerateEntity  =head3 GenerateEntity
2067    
2068  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
2069    
2070  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
2071  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1418  Line 2091 
2091    
2092  =item type  =item type
2093    
2094  Type name for the new entity.  Type name for the new entity.
2095    
2096    =item values
2097    
2098    Hash containing additional values that might be needed by the data generation methods (optional).
2099    
2100    =back
2101    
2102    =cut
2103    
2104    sub GenerateEntity {
2105        # Get the parameters.
2106        my ($self, $id, $type, $values) = @_;
2107        # Create the return hash.
2108        my $this = { id => $id };
2109        # Get the metadata structure.
2110        my $metadata = $self->{_metaData};
2111        # Get this entity's list of fields.
2112        if (!exists $metadata->{Entities}->{$type}) {
2113            Confess("Unrecognized entity type $type in GenerateEntity.");
2114        } else {
2115            my $entity = $metadata->{Entities}->{$type};
2116            my $fields = $entity->{Fields};
2117            # Generate data from the fields.
2118            _GenerateFields($this, $fields, $type, $values);
2119        }
2120        # Return the hash created.
2121        return $this;
2122    }
2123    
2124    =head3 GetEntity
2125    
2126    C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
2127    
2128    Return an object describing the entity instance with a specified ID.
2129    
2130    =over 4
2131    
2132    =item entityType
2133    
2134    Entity type name.
2135    
2136    =item ID
2137    
2138    ID of the desired entity.
2139    
2140    =item RETURN
2141    
2142    Returns a B<DBObject> representing the desired entity instance, or an undefined value if no
2143    instance is found with the specified key.
2144    
2145    =back
2146    
2147    =cut
2148    
2149    sub GetEntity {
2150        # Get the parameters.
2151        my ($self, $entityType, $ID) = @_;
2152        # Create a query.
2153        my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]);
2154        # Get the first (and only) object.
2155        my $retVal = $query->Fetch();
2156        # Return the result.
2157        return $retVal;
2158    }
2159    
2160    =head3 GetChoices
2161    
2162    C<< my @values = $erdb->GetChoices($entityName, $fieldName); >>
2163    
2164    Return a list of all the values for the specified field that are represented in the
2165    specified entity.
2166    
2167    Note that if the field is not indexed, then this will be a very slow operation.
2168    
2169    =over 4
2170    
2171    =item entityName
2172    
2173    Name of an entity in the database.
2174    
2175    =item fieldName
2176    
2177    Name of a field belonging to the entity. This is a raw field name without
2178    the standard parenthesized notation used in most calls.
2179    
2180    =item RETURN
2181    
2182    Returns a list of the distinct values for the specified field in the database.
2183    
2184    =back
2185    
2186    =cut
2187    
2188    sub GetChoices {
2189        # Get the parameters.
2190        my ($self, $entityName, $fieldName) = @_;
2191        # Declare the return variable.
2192        my @retVal;
2193        # Get the entity data structure.
2194        my $entityData = $self->_GetStructure($entityName);
2195        # Get the field.
2196        my $fieldHash = $entityData->{Fields};
2197        if (! exists $fieldHash->{$fieldName}) {
2198            Confess("$fieldName not found in $entityName.");
2199        } else {
2200            # Get the name of the relation containing the field.
2201            my $relation = $fieldHash->{$fieldName}->{relation};
2202            # Fix up the field name.
2203            my $realName = _FixName($fieldName);
2204            # Get the database handle.
2205            my $dbh = $self->{_dbh};
2206            # Query the database.
2207            my $results = $dbh->SQL("SELECT DISTINCT $realName FROM $relation");
2208            # Clean the results. They are stored as a list of lists, and we just want the one list.
2209            @retVal = sort map { $_->[0] } @{$results};
2210        }
2211        # Return the result.
2212        return @retVal;
2213    }
2214    
2215    =head3 GetEntityValues
2216    
2217    C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
2218    
2219    Return a list of values from a specified entity instance. If the entity instance
2220    does not exist, an empty list is returned.
2221    
2222    =over 4
2223    
2224    =item entityType
2225    
2226    Entity type name.
2227    
2228    =item ID
2229    
2230    ID of the desired entity.
2231    
2232    =item fields
2233    
2234    List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.
2235    
2236    =item RETURN
2237    
2238    Returns a flattened list of the values of the specified fields for the specified entity.
2239    
2240    =back
2241    
2242    =cut
2243    
2244    sub GetEntityValues {
2245        # Get the parameters.
2246        my ($self, $entityType, $ID, $fields) = @_;
2247        # Get the specified entity.
2248        my $entity = $self->GetEntity($entityType, $ID);
2249        # Declare the return list.
2250        my @retVal = ();
2251        # If we found the entity, push the values into the return list.
2252        if ($entity) {
2253            push @retVal, $entity->Values($fields);
2254        }
2255        # Return the result.
2256        return @retVal;
2257    }
2258    
2259    =head3 GetAll
2260    
2261    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
2262    
2263    Return a list of values taken from the objects returned by a query. The first three
2264    parameters correspond to the parameters of the L</Get> method. The final parameter is
2265    a list of the fields desired from each record found by the query. The field name
2266    syntax is the standard syntax used for fields in the B<ERDB> system--
2267    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
2268    or relationship and I<fieldName> is the name of the field.
2269    
2270    The list returned will be a list of lists. Each element of the list will contain
2271    the values returned for the fields specified in the fourth parameter. If one of the
2272    fields specified returns multiple values, they are flattened in with the rest. For
2273    example, the following call will return a list of the features in a particular
2274    spreadsheet cell, and each feature will be represented by a list containing the
2275    feature ID followed by all of its aliases.
2276    
2277    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
2278    
2279    =over 4
2280    
2281    =item objectNames
2282    
2283    List containing the names of the entity and relationship objects to be retrieved.
2284    
2285    =item filterClause
2286    
2287    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
2288    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
2289    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
2290    parameter list as additional parameters. The fields in a filter clause can come from primary
2291    entity relations, relationship relations, or secondary entity relations; however, all of the
2292    entities and relationships involved must be included in the list of object names.
2293    
2294    =item parameterList
2295    
2296    List of the parameters to be substituted in for the parameters marks in the filter clause.
2297    
2298    =item fields
2299    
2300    List of the fields to be returned in each element of the list returned.
2301    
2302    =item count
2303    
2304    Maximum number of records to return. If omitted or 0, all available records will be returned.
2305    
2306    =item RETURN
2307    
2308    Returns a list of list references. Each element of the return list contains the values for the
2309    fields specified in the B<fields> parameter.
2310    
2311    =back
2312    
2313    =cut
2314    #: Return Type @@;
2315    sub GetAll {
2316        # Get the parameters.
2317        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
2318        # Translate the parameters from a list reference to a list. If the parameter
2319        # list is a scalar we convert it into a singleton list.
2320        my @parmList = ();
2321        if (ref $parameterList eq "ARRAY") {
2322            Trace("GetAll parm list is an array.") if T(4);
2323            @parmList = @{$parameterList};
2324        } else {
2325            Trace("GetAll parm list is a scalar: $parameterList.") if T(4);
2326            push @parmList, $parameterList;
2327        }
2328        # Insure the counter has a value.
2329        if (!defined $count) {
2330            $count = 0;
2331        }
2332        # Add the row limit to the filter clause.
2333        if ($count > 0) {
2334            $filterClause .= " LIMIT $count";
2335        }
2336        # Create the query.
2337        my $query = $self->Get($objectNames, $filterClause, \@parmList);
2338        # Set up a counter of the number of records read.
2339        my $fetched = 0;
2340        # Loop through the records returned, extracting the fields. Note that if the
2341        # counter is non-zero, we stop when the number of records read hits the count.
2342        my @retVal = ();
2343        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
2344            my @rowData = $row->Values($fields);
2345            push @retVal, \@rowData;
2346            $fetched++;
2347        }
2348        Trace("$fetched rows returned in GetAll.") if T(SQL => 4);
2349        # Return the resulting list.
2350        return @retVal;
2351    }
2352    
2353    =head3 Exists
2354    
2355    C<< my $found = $sprout->Exists($entityName, $entityID); >>
2356    
2357    Return TRUE if an entity exists, else FALSE.
2358    
2359    =over 4
2360    
2361    =item entityName
2362    
2363    Name of the entity type (e.g. C<Feature>) relevant to the existence check.
2364    
2365    =item entityID
2366    
2367    ID of the entity instance whose existence is to be checked.
2368    
2369    =item RETURN
2370    
2371    Returns TRUE if the entity instance exists, else FALSE.
2372    
2373    =back
2374    
2375    =cut
2376    #: Return Type $;
2377    sub Exists {
2378        # Get the parameters.
2379        my ($self, $entityName, $entityID) = @_;
2380        # Check for the entity instance.
2381        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2382        my $testInstance = $self->GetEntity($entityName, $entityID);
2383        # Return an existence indicator.
2384        my $retVal = ($testInstance ? 1 : 0);
2385        return $retVal;
2386    }
2387    
2388    =head3 EstimateRowSize
2389    
2390    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
2391    
2392    Estimate the row size of the specified relation. The estimated row size is computed by adding
2393    up the average length for each data type.
2394    
2395    =over 4
2396    
2397    =item relName
2398    
2399    Name of the relation whose estimated row size is desired.
2400    
2401    =item RETURN
2402    
2403    Returns an estimate of the row size for the specified relation.
2404    
2405    =back
2406    
2407    =cut
2408    #: Return Type $;
2409    sub EstimateRowSize {
2410        # Get the parameters.
2411        my ($self, $relName) = @_;
2412        # Declare the return variable.
2413        my $retVal = 0;
2414        # Find the relation descriptor.
2415        my $relation = $self->_FindRelation($relName);
2416        # Get the list of fields.
2417        for my $fieldData (@{$relation->{Fields}}) {
2418            # Get the field type and add its length.
2419            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
2420            $retVal += $fieldLen;
2421        }
2422        # Return the result.
2423        return $retVal;
2424    }
2425    
2426    =head3 GetFieldTable
2427    
2428    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
2429    
2430    Get the field structure for a specified entity or relationship.
2431    
2432    =over 4
2433    
2434    =item objectName
2435    
2436    Name of the desired entity or relationship.
2437    
2438    =item RETURN
2439    
2440    The table containing the field descriptors for the specified object.
2441    
2442    =back
2443    
2444    =cut
2445    
2446    sub GetFieldTable {
2447        # Get the parameters.
2448        my ($self, $objectName) = @_;
2449        # Get the descriptor from the metadata.
2450        my $objectData = $self->_GetStructure($objectName);
2451        # Return the object's field table.
2452        return $objectData->{Fields};
2453    }
2454    
2455    =head2 Data Mining Methods
2456    
2457    =head3 GetUsefulCrossValues
2458    
2459    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
2460    
2461    Return a list of the useful attributes that would be returned by a B<Cross> call
2462    from an entity of the source entity type through the specified relationship. This
2463    means it will return the fields of the target entity type and the intersection data
2464    fields in the relationship. Only primary table fields are returned. In other words,
2465    the field names returned will be for fields where there is always one and only one
2466    value.
2467    
2468    =over 4
2469    
2470    =item sourceEntity
2471    
2472    Name of the entity from which the relationship crossing will start.
2473    
2474    =item relationship
2475    
2476    Name of the relationship being crossed.
2477    
2478    =item RETURN
2479    
2480    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2481    
2482    =back
2483    
2484    =cut
2485    #: Return Type @;
2486    sub GetUsefulCrossValues {
2487        # Get the parameters.
2488        my ($self, $sourceEntity, $relationship) = @_;
2489        # Declare the return variable.
2490        my @retVal = ();
2491        # Determine the target entity for the relationship. This is whichever entity is not
2492        # the source entity. So, if the source entity is the FROM, we'll get the name of
2493        # the TO, and vice versa.
2494        my $relStructure = $self->_GetStructure($relationship);
2495        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2496        my $targetEntity = $relStructure->{$targetEntityType};
2497        # Get the field table for the entity.
2498        my $entityFields = $self->GetFieldTable($targetEntity);
2499        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2500        # For the entity fields, the key aspect of the target structure is that the {relation} value
2501        # must match the entity name.
2502        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2503                            keys %{$entityFields};
2504        # Push the fields found onto the return variable.
2505        push @retVal, sort @fieldList;
2506        # Get the field table for the relationship.
2507        my $relationshipFields = $self->GetFieldTable($relationship);
2508        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2509        # This may end up being an empty set.
2510        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2511                            keys %{$relationshipFields};
2512        # Push these onto the return list.
2513        push @retVal, sort @fieldList2;
2514        # Return the result.
2515        return @retVal;
2516    }
2517    
2518    =head3 FindColumn
2519    
2520    C<< my $colIndex = ERDB::FindColumn($headerLine, $columnIdentifier); >>
2521    
2522    Return the location a desired column in a data mining header line. The data
2523    mining header line is a tab-separated list of column names. The column
2524    identifier is either the numerical index of a column or the actual column
2525    name.
2526    
2527    =over 4
2528    
2529    =item headerLine
2530    
2531    The header line from a data mining command, which consists of a tab-separated
2532    list of column names.
2533    
2534    =item columnIdentifier
2535    
2536    Either the ordinal number of the desired column (1-based), or the name of the
2537    desired column.
2538    
2539    =item RETURN
2540    
2541    Returns the array index (0-based) of the desired column.
2542    
2543    =back
2544    
2545    =cut
2546    
2547    sub FindColumn {
2548        # Get the parameters.
2549        my ($headerLine, $columnIdentifier) = @_;
2550        # Declare the return variable.
2551        my $retVal;
2552        # Split the header line into column names.
2553        my @headers = ParseColumns($headerLine);
2554        # Determine whether we have a number or a name.
2555        if ($columnIdentifier =~ /^\d+$/) {
2556            # Here we have a number. Subtract 1 and validate the result.
2557            $retVal = $columnIdentifier - 1;
2558            if ($retVal < 0 || $retVal > $#headers) {
2559                Confess("Invalid column identifer \"$columnIdentifier\": value out of range.");
2560            }
2561        } else {
2562            # Here we have a name. We need to find it in the list.
2563            for (my $i = 0; $i <= $#headers && ! defined($retVal); $i++) {
2564                if ($headers[$i] eq $columnIdentifier) {
2565                    $retVal = $i;
2566                }
2567            }
2568            if (! defined($retVal)) {
2569                Confess("Invalid column identifier \"$columnIdentifier\": value not found.");
2570            }
2571        }
2572        # Return the result.
2573        return $retVal;
2574    }
2575    
2576    =head3 ParseColumns
2577    
2578    C<< my @columns = ERDB::ParseColumns($line); >>
2579    
2580    Convert the specified data line to a list of columns.
2581    
2582    =over 4
2583    
2584    =item line
2585    
2586    A data mining input, consisting of a tab-separated list of columns terminated by a
2587    new-line.
2588    
2589  =item values  =item RETURN
2590    
2591  Hash containing additional values that might be needed by the data generation methods (optional).  Returns a list consisting of the column values.
2592    
2593  =back  =back
2594    
2595  =cut  =cut
2596    
2597  sub GenerateEntity {  sub ParseColumns {
2598      # Get the parameters.      # Get the parameters.
2599      my ($self, $id, $type, $values) = @_;      my ($line) = @_;
2600      # Create the return hash.      # Chop off the line-end.
2601      my $this = { id => $id };      chomp $line;
2602      # Get the metadata structure.      # Split it into a list.
2603      my $metadata = $self->{_metaData};      my @retVal = split(/\t/, $line);
2604      # Get this entity's list of fields.      # Return the result.
2605      if (!exists $metadata->{Entities}->{$type}) {      return @retVal;
         Confess("Unrecognized entity type $type in GenerateEntity.");  
     } else {  
         my $entity = $metadata->{Entities}->{$type};  
         my $fields = $entity->{Fields};  
         # Generate data from the fields.  
         _GenerateFields($this, $fields, $type, $values);  
     }  
     # Return the hash created.  
     return $this;  
2606  }  }
2607    
2608  =head3 GetEntity  =head2 Virtual Methods
   
 C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  
2609    
2610  Return an object describing the entity instance with a specified ID.  =head3 CleanKeywords
2611    
2612  =over 4  C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >>
2613    
2614  =item entityType  Clean up a search expression or keyword list. This is a virtual method that may
2615    be overridden by the subclass. The base-class method removes extra spaces
2616    and converts everything to lower case.
2617    
2618  Entity type name.  =over 4
2619    
2620  =item ID  =item searchExpression
2621    
2622  ID of the desired entity.  Search expression or keyword list to clean. Note that a search expression may
2623    contain boolean operators which need to be preserved. This includes leading
2624    minus signs.
2625    
2626  =item RETURN  =item RETURN
2627    
2628  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Cleaned expression or keyword list.
 instance is found with the specified key.  
2629    
2630  =back  =back
2631    
2632  =cut  =cut
2633    
2634  sub GetEntity {  sub CleanKeywords {
2635      # Get the parameters.      # Get the parameters.
2636      my ($self, $entityType, $ID) = @_;      my ($self, $searchExpression) = @_;
2637      # Create a query.      # Lower-case the expression and copy it into the return variable. Note that we insure we
2638      my $query = $self->Get([$entityType], "$entityType(id) = ?", $ID);      # don't accidentally end up with an undefined value.
2639      # Get the first (and only) object.      my $retVal = lc($searchExpression || "");
2640      my $retVal = $query->Fetch();      # Remove extra spaces.
2641        $retVal =~ s/\s+/ /g;
2642        $retVal =~ s/(^\s+)|(\s+$)//g;
2643      # Return the result.      # Return the result.
2644      return $retVal;      return $retVal;
2645  }  }
2646    
2647  =head3 GetEntityValues  =head2 Internal Utility Methods
   
 C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  
2648    
2649  Return a list of values from a specified entity instance.  =head3 _RelationMap
2650    
2651  =over 4  C<< my @relationMap = _RelationMap($mappedNameHashRef, $mappedNameListRef); >>
2652    
2653  =item entityType  Create the relation map for an SQL query. The relation map is used by B<DBObject>
2654    to determine how to interpret the results of the query.
2655    
2656  Entity type name.  =over 4
2657    
2658  =item ID  =item mappedNameHashRef
2659    
2660  ID of the desired entity.  Reference to a hash that maps modified object names to real object names.
2661    
2662  =item fields  =item mappedNameListRef
2663    
2664  List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  Reference to a list of modified object names in the order they appear in the
2665    SELECT list.
2666    
2667  =item RETURN  =item RETURN
2668    
2669  Returns a flattened list of the values of the specified fields for the specified entity.  Returns a list of 2-tuples. Each tuple consists of an object name as used in the
2670    query followed by the actual name of that object. This enables the B<DBObject> to
2671    determine the order of the tables in the query and which object name belongs to each
2672    mapped object name. Most of the time these two values are the same; however, if a
2673    relation occurs twice in the query, the relation name in the field list and WHERE
2674    clause will use a mapped name (generally the actual relation name with a numeric
2675    suffix) that does not match the actual relation name.
2676    
2677  =back  =back
2678    
2679  =cut  =cut
2680    
2681  sub GetEntityValues {  sub _RelationMap {
2682      # Get the parameters.      # Get the parameters.
2683      my ($self, $entityType, $ID, $fields) = @_;      my ($mappedNameHashRef, $mappedNameListRef) = @_;
2684      # Get the specified entity.      # Declare the return variable.
     my $entity = $self->GetEntity($entityType, $ID);  
     # Declare the return list.  
2685      my @retVal = ();      my @retVal = ();
2686      # If we found the entity, push the values into the return list.      # Build the map.
2687      if ($entity) {      for my $mappedName (@{$mappedNameListRef}) {
2688          push @retVal, $entity->Values($fields);          push @retVal, [$mappedName, $mappedNameHashRef->{$mappedName}];
2689      }      }
2690      # Return the result.      # Return it.
2691      return @retVal;      return @retVal;
2692  }  }
2693    
 =head3 GetAll  
   
 C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
2694    
2695  Return a list of values taken from the objects returned by a query. The first three  =head3 _SetupSQL
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
2696    
2697  The list returned will be a list of lists. Each element of the list will contain  Process a list of object names and a filter clause so that they can be used to
2698  the values returned for the fields specified in the fourth parameter. If one of the  build an SQL statement. This method takes in a reference to a list of object names
2699  fields specified returns multiple values, they are flattened in with the rest. For  and a filter clause. It will return a corrected filter clause, a list of mapped
2700  example, the following call will return a list of the features in a particular  names and the mapped name hash.
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
2701    
2702  C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  This is an instance method.
2703    
2704  =over 4  =over 4
2705    
2706  =item objectNames  =item objectNames
2707    
2708  List containing the names of the entity and relationship objects to be retrieved.  Reference to a list of the object names to be included in the query.
2709    
2710  =item filterClause  =item filterClause
2711    
2712  WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  A string containing the WHERE clause for the query (without the C<WHERE>) and also
2713  be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  optionally the C<ORDER BY> and C<LIMIT> clauses.
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
   
 List of the fields to be returned in each element of the list returned.  
2714    
2715  =item count  =item matchClause
2716    
2717  Maximum number of records to return. If omitted or 0, all available records will be returned.  An optional full-text search clause. If specified, it will be inserted at the
2718    front of the WHERE clause. It should already be SQL-formatted; that is, the
2719    field names should be in the form I<table>C<.>I<fieldName>.
2720    
2721  =item RETURN  =item RETURN
2722    
2723  Returns a list of list references. Each element of the return list contains the values for the  Returns a three-element list. The first element is the SQL statement suffix, beginning
2724  fields specified in the B<fields> parameter.  with the FROM clause. The second element is a reference to a list of the names to be
2725    used in retrieving the fields. The third element is a hash mapping the names to the
2726    objects they represent.
2727    
2728  =back  =back
2729    
2730  =cut  =cut
2731  #: Return Type @@;  
2732  sub GetAll {  sub _SetupSQL {
2733      # Get the parameters.      my ($self, $objectNames, $filterClause, $matchClause) = @_;
2734      my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;      # Adjust the list of object names to account for multiple occurrences of the
2735      # Translate the parameters from a list reference to a list. If the parameter      # same object. We start with a hash table keyed on object name that will
2736      # list is a scalar we convert it into a singleton list.      # return the object suffix. The first time an object is encountered it will
2737      my @parmList = ();      # not be found in the hash. The next time the hash will map the object name
2738      if (ref $parameterList eq "ARRAY") {      # to 2, then 3, and so forth.
2739          @parmList = @{$parameterList};      my %objectHash = ();
2740        # This list will contain the object names as they are to appear in the
2741        # FROM list.
2742        my @fromList = ();
2743        # This list contains the suffixed object name for each object. It is exactly
2744        # parallel to the list in the $objectNames parameter.
2745        my @mappedNameList = ();
2746        # Finally, this hash translates from a mapped name to its original object name.
2747        my %mappedNameHash = ();
2748        # Now we create the lists. Note that for every single name we push something into
2749        # @fromList and @mappedNameList. This insures that those two arrays are exactly
2750        # parallel to $objectNames.
2751        for my $objectName (@{$objectNames}) {
2752            # Get the next suffix for this object.
2753            my $suffix = $objectHash{$objectName};
2754            if (! $suffix) {
2755                # Here we are seeing the object for the first time. The object name
2756                # is used as is.
2757                push @mappedNameList, $objectName;
2758                push @fromList, $objectName;
2759                $mappedNameHash{$objectName} = $objectName;
2760                # Denote the next suffix will be 2.
2761                $objectHash{$objectName} = 2;
2762      } else {      } else {
2763          push @parmList, $parameterList;              # Here we've seen the object before. We construct a new name using
2764                # the suffix from the hash and update the hash.
2765                my $mappedName = "$objectName$suffix";
2766                $objectHash{$objectName} = $suffix + 1;
2767                # The FROM list has the object name followed by the mapped name. This
2768                # tells SQL it's still the same table, but we're using a different name
2769                # for it to avoid confusion.
2770                push @fromList, "$objectName $mappedName";
2771                # The mapped-name list contains the real mapped name.
2772                push @mappedNameList, $mappedName;
2773                # Finally, enable us to get back from the mapped name to the object name.
2774                $mappedNameHash{$mappedName} = $objectName;
2775      }      }
     # Create the query.  
     my $query = $self->Get($objectNames, $filterClause, @parmList);  
     # Set up a counter of the number of records read.  
     my $fetched = 0;  
     # Insure the counter has a value.  
     if (!defined $count) {  
         $count = 0;  
2776      }      }
2777      # Loop through the records returned, extracting the fields. Note that if the      # Begin the SELECT suffix. It starts with
2778      # counter is non-zero, we stop when the number of records read hits the count.      #
2779      my @retVal = ();      # FROM name1, name2, ... nameN
2780      while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {      #
2781          my @rowData = $row->Values($fields);      my $suffix = "FROM " . join(', ', @fromList);
2782          push @retVal, \@rowData;      # Now for the WHERE. First, we need a place for the filter string.
2783          $fetched++;      my $filterString = "";
2784        # We will also keep a list of conditions to add to the WHERE clause in order to link
2785        # entities and relationships as well as primary relations to secondary ones.
2786        my @joinWhere = ();
2787        # Check for a filter clause.
2788        if ($filterClause) {
2789            # Here we have one, so we convert its field names and add it to the query. First,
2790            # We create a copy of the filter string we can work with.
2791            $filterString = $filterClause;
2792            # Next, we sort the object names by length. This helps protect us from finding
2793            # object names inside other object names when we're doing our search and replace.
2794            my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
2795            # The final preparatory step is to create a hash table of relation names. The
2796            # table begins with the relation names already in the SELECT command. We may
2797            # need to add relations later if there is filtering on a field in a secondary
2798            # relation. The secondary relations are the ones that contain multiply-
2799            # occurring or optional fields.
2800            my %fromNames = map { $_ => 1 } @sortedNames;
2801            # We are ready to begin. We loop through the object names, replacing each
2802            # object name's field references by the corresponding SQL field reference.
2803            # Along the way, if we find a secondary relation, we will need to add it
2804            # to the FROM clause.
2805            for my $mappedName (@sortedNames) {
2806                # Get the length of the object name plus 2. This is the value we add to the
2807                # size of the field name to determine the size of the field reference as a
2808                # whole.
2809                my $nameLength = 2 + length $mappedName;
2810                # Get the real object name for this mapped name.
2811                my $objectName = $mappedNameHash{$mappedName};
2812                Trace("Processing $mappedName for object $objectName.") if T(4);
2813                # Get the object's field list.
2814                my $fieldList = $self->GetFieldTable($objectName);
2815                # Find the field references for this object.
2816                while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
2817                    # At this point, $1 contains the field name, and the current position
2818                    # is set immediately after the final parenthesis. We pull out the name of
2819                    # the field and the position and length of the field reference as a whole.
2820                    my $fieldName = $1;
2821                    my $len = $nameLength + length $fieldName;
2822                    my $pos = pos($filterString) - $len;
2823                    # Insure the field exists.
2824                    if (!exists $fieldList->{$fieldName}) {
2825                        Confess("Field $fieldName not found for object $objectName.");
2826                    } else {
2827                        Trace("Processing $fieldName at position $pos.") if T(4);
2828                        # Get the field's relation.
2829                        my $relationName = $fieldList->{$fieldName}->{relation};
2830                        # Now we have a secondary relation. We need to insure it matches the
2831                        # mapped name of the primary relation. First we peel off the suffix
2832                        # from the mapped name.
2833                        my $mappingSuffix = substr $mappedName, length($objectName);
2834                        # Put the mapping suffix onto the relation name to get the
2835                        # mapped relation name.
2836                        my $mappedRelationName = "$relationName$mappingSuffix";
2837                        # Insure the relation is in the FROM clause.
2838                        if (!exists $fromNames{$mappedRelationName}) {
2839                            # Add the relation to the FROM clause.
2840                            if ($mappedRelationName eq $relationName) {
2841                                # The name is un-mapped, so we add it without
2842                                # any frills.
2843                                $suffix .= ", $relationName";
2844                                push @joinWhere, "$objectName.id = $relationName.id";
2845                            } else {
2846                                # Here we have a mapping situation.
2847                                $suffix .= ", $relationName $mappedRelationName";
2848                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
2849      }      }
2850      # Return the resulting list.                          # Denote we have this relation available for future fields.
2851      return @retVal;                          $fromNames{$mappedRelationName} = 1;
2852                        }
2853                        # Form an SQL field reference from the relation name and the field name.
2854                        my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
2855                        # Put it into the filter string in place of the old value.
2856                        substr($filterString, $pos, $len) = $sqlReference;
2857                        # Reposition the search.
2858                        pos $filterString = $pos + length $sqlReference;
2859                    }
2860                }
2861            }
2862        }
2863        # The next step is to join the objects together. We only need to do this if there
2864        # is more than one object in the object list. We start with the first object and
2865        # run through the objects after it. Note also that we make a safety copy of the
2866        # list before running through it, because we shift off the first object before
2867        # processing the rest.
2868        my @mappedObjectList = @mappedNameList;
2869        my $lastMappedObject = shift @mappedObjectList;
2870        # Get the join table.
2871        my $joinTable = $self->{_metaData}->{Joins};
2872        # Loop through the object list.
2873        for my $thisMappedObject (@mappedObjectList) {
2874            # Look for a join using the real object names.
2875            my $lastObject = $mappedNameHash{$lastMappedObject};
2876            my $thisObject = $mappedNameHash{$thisMappedObject};
2877            my $joinKey = "$lastObject/$thisObject";
2878            if (!exists $joinTable->{$joinKey}) {
2879                # Here there's no join, so we throw an error.
2880                Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
2881            } else {
2882                # Get the join clause.
2883                my $unMappedJoin = $joinTable->{$joinKey};
2884                # Fix the names.
2885                $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
2886                $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
2887                push @joinWhere, $unMappedJoin;
2888                # Save this object as the last object for the next iteration.
2889                $lastMappedObject = $thisMappedObject;
2890            }
2891        }
2892        # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
2893        # here is we want the filter clause to be empty if there's no WHERE filter.
2894        # We'll put the ORDER BY / LIMIT clauses in the following variable.
2895        my $orderClause = "";
2896        # This is only necessary if we have a filter string in which the ORDER BY
2897        # and LIMIT clauses can live.
2898        if ($filterString) {
2899            # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
2900            # operator so that we find the first occurrence of either verb.
2901            if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
2902                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
2903                my $pos = pos $filterString;
2904                $orderClause = $2 . substr($filterString, $pos);
2905                $filterString = $1;
2906            }
2907        }
2908        # All the things that are supposed to be in the WHERE clause of the
2909        # SELECT command need to be put into @joinWhere so we can string them
2910        # together. We begin with the match clause. This is important,
2911        # because the match clause's parameter mark must precede any parameter
2912        # marks in the filter string.
2913        if ($matchClause) {
2914            push @joinWhere, $matchClause;
2915        }
2916        # Add the filter string. We put it in parentheses to avoid operator
2917        # precedence problems with the match clause or the joins.
2918        if ($filterString) {
2919            Trace("Filter string is \"$filterString\".") if T(4);
2920            push @joinWhere, "($filterString)";
2921        }
2922        # String it all together into a big filter clause.
2923        if (@joinWhere) {
2924            $suffix .= " WHERE " . join(' AND ', @joinWhere);
2925        }
2926        # Add the sort or limit clause (if any).
2927        if ($orderClause) {
2928            $suffix .= " $orderClause";
2929        }
2930        # Return the suffix, the mapped name list, and the mapped name hash.
2931        return ($suffix, \@mappedNameList, \%mappedNameHash);
2932  }  }
2933    
2934  =head2 Internal Utility Methods  =head3 _GetStatementHandle
2935    
2936    This method will prepare and execute an SQL query, returning the statement handle.
2937    The main reason for doing this here is so that everybody who does SQL queries gets
2938    the benefit of tracing.
2939    
2940    This is an instance method.
2941    
2942    =over 4
2943    
2944    =item command
2945    
2946    Command to prepare and execute.
2947    
2948    =item params
2949    
2950    Reference to a list of the values to be substituted in for the parameter marks.
2951    
2952    =item RETURN
2953    
2954    Returns a prepared and executed statement handle from which the caller can extract
2955    results.
2956    
2957    =back
2958    
2959    =cut
2960    
2961    sub _GetStatementHandle {
2962        # Get the parameters.
2963        my ($self, $command, $params) = @_;
2964        # Trace the query.
2965        Trace("SQL query: $command") if T(SQL => 3);
2966        Trace("PARMS: '" . (join "', '", @{$params}) . "'") if (T(SQL => 4) && (@{$params} > 0));
2967        # Get the database handle.
2968        my $dbh = $self->{_dbh};
2969        # Prepare the command.
2970        my $sth = $dbh->prepare_command($command);
2971        # Execute it with the parameters bound in.
2972        $sth->execute(@{$params}) || Confess("SELECT error" . $sth->errstr());
2973        # Return the statement handle.
2974        return $sth;
2975    }
2976    
2977  =head3 GetLoadStats  =head3 _GetLoadStats
2978    
2979  Return a blank statistics object for use by the load methods.  Return a blank statistics object for use by the load methods.
2980    
# Line 1625  Line 2983 
2983  =cut  =cut
2984    
2985  sub _GetLoadStats {  sub _GetLoadStats {
2986      return Stats->new('records');      return Stats->new();
2987  }  }
2988    
2989  =head3 GenerateFields  =head3 _GenerateFields
2990    
2991  Generate field values from a field structure and store in a specified table. The field names  Generate field values from a field structure and store in a specified table. The field names
2992  are first sorted by pass count, certain pre-defined fields are removed from the list, and  are first sorted by pass count, certain pre-defined fields are removed from the list, and
# Line 1702  Line 3060 
3060      }      }
3061  }  }
3062    
3063  =head3 DumpRelation  =head3 _DumpRelation
3064    
3065  Dump the specified relation's to the specified output file in tab-delimited format.  Dump the specified relation's to the specified output file in tab-delimited format.
3066    
# Line 1752  Line 3110 
3110      close DTXOUT;      close DTXOUT;
3111  }  }
3112    
3113  =head3 GetStructure  =head3 _GetStructure
3114    
3115  Get the data structure for a specified entity or relationship.  Get the data structure for a specified entity or relationship.
3116    
# Line 1791  Line 3149 
3149      return $retVal;      return $retVal;
3150  }  }
3151    
3152  =head3 GetRelationTable  
3153    
3154    =head3 _GetRelationTable
3155    
3156  Get the list of relations for a specified entity or relationship.  Get the list of relations for a specified entity or relationship.
3157    
# Line 1820  Line 3180 
3180      return $objectData->{Relations};      return $objectData->{Relations};
3181  }  }
3182    
3183  =head3 GetFieldTable  =head3 _ValidateFieldNames
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
     # Get the parameters.  
     my ($self, $objectName) = @_;  
     # Get the descriptor from the metadata.  
     my $objectData = $self->_GetStructure($objectName);  
     # Return the object's field table.  
     return $objectData->{Fields};  
 }  
   
 =head3 ValidateFieldNames  
3184    
3185  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
3186  will be written to the standard error output. If there is an error, this method will abort. This is  will be written to the standard error output. If there is an error, this method will abort. This is
# Line 1904  Line 3235 
3235      }      }
3236  }  }
3237    
3238  =head3 LoadRelation  =head3 _LoadRelation
3239    
3240  Load a relation from the data in a tab-delimited disk file. The load will only take place if a disk  Load a relation from the data in a tab-delimited disk file. The load will only take place if a disk
3241  file with the same name as the relation exists in the specified directory.  file with the same name as the relation exists in the specified directory.
# Line 1964  Line 3295 
3295      return $retVal;      return $retVal;
3296  }  }
3297    
3298  =head3 LoadMetaData  =head3 _LoadMetaData
3299    
3300  This method loads the data describing this database from an XML file into a metadata structure.  This method loads the data describing this database from an XML file into a metadata structure.
3301  The resulting structure is a set of nested hash tables containing all the information needed to  The resulting structure is a set of nested hash tables containing all the information needed to
# Line 1989  Line 3320 
3320  sub _LoadMetaData {  sub _LoadMetaData {
3321      # Get the parameters.      # Get the parameters.
3322      my ($filename) = @_;      my ($filename) = @_;
3323        Trace("Reading Sprout DBD from $filename.") if T(2);
3324      # Slurp the XML file into a variable. Extensive use of options is used to insure we      # Slurp the XML file into a variable. Extensive use of options is used to insure we
3325      # get the exact structure we want.      # get the exact structure we want.
3326      my $metadata = XML::Simple::XMLin($filename,      my $metadata = XML::Simple::XMLin($filename,
# Line 2195  Line 3527 
3527          my @fromList = ();          my @fromList = ();
3528          my @toList = ();          my @toList = ();
3529          my @bothList = ();          my @bothList = ();
3530          Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
3531          for my $relationshipName (keys %{$relationshipList}) {          for my $relationshipName (keys %{$relationshipList}) {
3532              my $relationship = $relationshipList->{$relationshipName};              my $relationship = $relationshipList->{$relationshipName};
3533              # Determine if this relationship has our entity in one of its link fields.              # Determine if this relationship has our entity in one of its link fields.
3534              my $fromEntity = $relationship->{from};              my $fromEntity = $relationship->{from};
3535              my $toEntity = $relationship->{to};              my $toEntity = $relationship->{to};
3536              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
3537              if ($fromEntity eq $entityName) {              if ($fromEntity eq $entityName) {
3538                  if ($toEntity eq $entityName) {                  if ($toEntity eq $entityName) {
3539                      # Here the relationship is recursive.                      # Here the relationship is recursive.
3540                      push @bothList, $relationshipName;                      push @bothList, $relationshipName;
3541                      Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
3542                  } else {                  } else {
3543                      # Here the relationship comes from the entity.                      # Here the relationship comes from the entity.
3544                      push @fromList, $relationshipName;                      push @fromList, $relationshipName;
3545                      Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
3546                  }                  }
3547              } elsif ($toEntity eq $entityName) {              } elsif ($toEntity eq $entityName) {
3548                  # Here the relationship goes to the entity.                  # Here the relationship goes to the entity.
3549                  push @toList, $relationshipName;                  push @toList, $relationshipName;
3550                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
3551              }              }
3552          }          }
3553          # Create the nonrecursive joins. Note that we build two hashes for running          # Create the nonrecursive joins. Note that we build two hashes for running
# Line 2231  Line 3563 
3563                  # Create joins between the entity and this relationship.                  # Create joins between the entity and this relationship.
3564                  my $linkField = "$relationshipName.${linkType}_link";                  my $linkField = "$relationshipName.${linkType}_link";
3565                  my $joinClause = "$entityName.id = $linkField";                  my $joinClause = "$entityName.id = $linkField";
3566                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
3567                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
3568                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
3569                  # Create joins between this relationship and the other relationships.                  # Create joins between this relationship and the other relationships.
# Line 2252  Line 3584 
3584                              # relationship and itself are prohibited.                              # relationship and itself are prohibited.
3585                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
3586                              $joinTable{$joinKey} = $relJoinClause;                              $joinTable{$joinKey} = $relJoinClause;
3587                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
3588                          }                          }
3589                      }                      }
3590                  }                  }
# Line 2261  Line 3593 
3593                  # relationship can only be ambiguous with another recursive relationship,                  # relationship can only be ambiguous with another recursive relationship,
3594                  # and the incoming relationship from the outer loop is never recursive.                  # and the incoming relationship from the outer loop is never recursive.
3595                  for my $otherName (@bothList) {                  for my $otherName (@bothList) {
3596                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
3597                      # Join from the left.                      # Join from the left.
3598                      $joinTable{"$relationshipName/$otherName"} =                      $joinTable{"$relationshipName/$otherName"} =
3599                          "$linkField = $otherName.from_link";                          "$linkField = $otherName.from_link";
# Line 2276  Line 3608 
3608          # rise to situations where we can't create the path we want; however, it is always          # rise to situations where we can't create the path we want; however, it is always
3609          # possible to get the same effect using multiple queries.          # possible to get the same effect using multiple queries.
3610          for my $relationshipName (@bothList) {          for my $relationshipName (@bothList) {
3611              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
3612              # Join to the entity from each direction.              # Join to the entity from each direction.
3613              $joinTable{"$entityName/$relationshipName"} =              $joinTable{"$entityName/$relationshipName"} =
3614                  "$entityName.id = $relationshipName.from_link";                  "$entityName.id = $relationshipName.from_link";
# Line 2290  Line 3622 
3622      return $metadata;      return $metadata;
3623  }  }
3624    
3625  =head3 CreateRelationshipIndex  =head3 _CreateRelationshipIndex
3626    
3627  Create an index for a relationship's relation.  Create an index for a relationship's relation.
3628    
# Line 2335  Line 3667 
3667      _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);      _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
3668  }  }
3669    
3670  =head3 AddIndex  =head3 _AddIndex
3671    
3672  Add an index to a relation structure.  Add an index to a relation structure.
3673    
# Line 2381  Line 3713 
3713      $relationStructure->{Indexes}->{$indexName} = $newIndex;      $relationStructure->{Indexes}->{$indexName} = $newIndex;
3714  }  }
3715    
3716  =head3 FixupFields  =head3 _FixupFields
3717    
3718  This method fixes the field list for an entity or relationship. It will add the caller-specified  This method fixes the field list for an entity or relationship. It will add the caller-specified
3719  relation name to fields that do not have a name and set the C<PrettySort> value as specified.  relation name to fields that do not have a name and set the C<PrettySort> value as specified.
# Line 2419  Line 3751 
3751          # Here it doesn't, so we create a new one.          # Here it doesn't, so we create a new one.
3752          $structure->{Fields} = { };          $structure->{Fields} = { };
3753      } else {      } else {
3754          # Here we have a field list. Loop through its fields.          # Here we have a field list. We need to track the searchable fields, so we
3755            # create a list for stashing them.
3756            my @textFields = ();
3757            # Loop through the fields.
3758          my $fieldStructures = $structure->{Fields};          my $fieldStructures = $structure->{Fields};
3759          for my $fieldName (keys %{$fieldStructures}) {          for my $fieldName (keys %{$fieldStructures}) {
3760              Trace("Processing field $fieldName of $defaultRelationName.") if T(4);              Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
# Line 2433  Line 3768 
3768                  # The data generator will use the default for the field's type.                  # The data generator will use the default for the field's type.
3769                  $fieldData->{DataGen} = { content => $TypeTable{$type}->{dataGen} };                  $fieldData->{DataGen} = { content => $TypeTable{$type}->{dataGen} };
3770              }              }
3771                # Check for searchability.
3772                if ($fieldData->{searchable}) {
3773                    # Only allow this for a primary relation.
3774                    if ($fieldData->{relation} ne $defaultRelationName) {
3775                        Confess("Field $fieldName of $defaultRelationName is in secondary relations and cannot be searchable.");
3776                    } else {
3777                        push @textFields, $fieldName;
3778                    }
3779                }
3780              # Plug in the defaults for the optional data generation parameters.              # Plug in the defaults for the optional data generation parameters.
3781              Tracer::MergeOptions($fieldData->{DataGen}, { testCount => 1, pass => 0 });              Tracer::MergeOptions($fieldData->{DataGen}, { testCount => 1, pass => 0 });
3782              # Add the PrettySortValue.              # Add the PrettySortValue.
3783              $fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue);              $fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue);
3784          }          }
3785            # If there are searchable fields, remember the fact.
3786            if (@textFields) {
3787                $structure->{searchFields} = \@textFields;
3788            }
3789      }      }
3790  }  }
3791    
3792  =head3 FixName  =head3 _FixName
3793    
3794  Fix the incoming field name so that it is a legal SQL column name.  Fix the incoming field name so that it is a legal SQL column name.
3795    
# Line 2470  Line 3818 
3818      return $fieldName;      return $fieldName;
3819  }  }
3820    
3821  =head3 FixNames  =head3 _FixNames
3822    
3823  Fix all the field names in a list.  Fix all the field names in a list.
3824    
# Line 2501  Line 3849 
3849      return @result;      return @result;
3850  }  }
3851    
3852  =head3 AddField  =head3 _AddField
3853    
3854  Add a field to a field list.  Add a field to a field list.
3855    
# Line 2536  Line 3884 
3884      $fieldList->{$fieldName} = $fieldStructure;      $fieldList->{$fieldName} = $fieldStructure;
3885  }  }
3886    
3887  =head3 ReOrderRelationTable  =head3 _ReOrderRelationTable
3888    
3889  This method will take a relation table and re-sort it according to the implicit ordering of the  This method will take a relation table and re-sort it according to the implicit ordering of the
3890  C<PrettySort> property. Instead of a hash based on field names, it will return a list of fields.  C<PrettySort> property. Instead of a hash based on field names, it will return a list of fields.
# Line 2597  Line 3945 
3945    
3946  }  }
3947    
3948  =head3 IsPrimary  =head3 _IsPrimary
3949    
3950  Return TRUE if a specified relation is a primary relation, else FALSE. A relation is primary  Return TRUE if a specified relation is a primary relation, else FALSE. A relation is primary
3951  if it has the same name as an entity or relationship.  if it has the same name as an entity or relationship.
# Line 2633  Line 3981 
3981      return $retVal;      return $retVal;
3982  }  }
3983    
3984  =head3 FindRelation  =head3 _FindRelation
3985    
3986  Return the descriptor for the specified relation.  Return the descriptor for the specified relation.
3987    
# Line 2664  Line 4012 
4012    
4013  =head2 HTML Documentation Utility Methods  =head2 HTML Documentation Utility Methods
4014    
4015  =head3 ComputeRelationshipSentence  =head3 _ComputeRelationshipSentence
4016    
4017  The relationship sentence consists of the relationship name between the names of the  The relationship sentence consists of the relationship name between the names of the
4018  two related entities and an arity indicator.  two related entities and an arity indicator.
# Line 2702  Line 4050 
4050      return $result;      return $result;
4051  }  }
4052    
4053  =head3 ComputeRelationshipHeading  =head3 _ComputeRelationshipHeading
4054    
4055  The relationship heading is the L<relationship sentence|/ComputeRelationshipSentence> with the entity  The relationship heading is the L<relationship sentence|/ComputeRelationshipSentence> with the entity
4056  names hyperlinked to the appropriate entity sections of the document.  names hyperlinked to the appropriate entity sections of the document.
# Line 2739  Line 4087 
4087      return $result;      return $result;
4088  }  }
4089    
4090  =head3 ShowRelationTable  =head3 _ShowRelationTable
4091    
4092  Generate the HTML string for a particular relation. The relation's data will be formatted as an HTML  Generate the HTML string for a particular relation. The relation's data will be formatted as an HTML
4093  table with three columns-- the field name, the field type, and the field description.  table with three columns-- the field name, the field type, and the field description.
# Line 2800  Line 4148 
4148      $htmlString .= "</ul>\n";      $htmlString .= "</ul>\n";
4149  }  }
4150    
4151  =head3 OpenFieldTable  =head3 _OpenFieldTable
4152    
4153  This method creates the header string for the field table generated by L</ShowMetaData>.  This method creates the header string for the field table generated by L</ShowMetaData>.
4154    
# Line 2825  Line 4173 
4173      return _OpenTable($tablename, 'Field', 'Type', 'Description');      return _OpenTable($tablename, 'Field', 'Type', 'Description');
4174  }  }
4175    
4176  =head3 OpenTable  =head3 _OpenTable
4177    
4178  This method creates the header string for an HTML table.  This method creates the header string for an HTML table.
4179    
# Line 2865  Line 4213 
4213      return $htmlString;      return $htmlString;
4214  }  }
4215    
4216  =head3 CloseTable  =head3 _CloseTable
4217    
4218  This method returns the HTML for closing a table.  This method returns the HTML for closing a table.
4219    
# Line 2877  Line 4225 
4225      return "</table></p>\n";      return "</table></p>\n";
4226  }  }
4227    
4228  =head3 ShowField  =head3 _ShowField
4229    
4230  This method returns the HTML for displaying a row of field information in a field table.  This method returns the HTML for displaying a row of field information in a field table.
4231    
# Line 2912  Line 4260 
4260      return $htmlString;      return $htmlString;
4261  }  }
4262    
4263  =head3 HTMLNote  =head3 _HTMLNote
4264    
4265  Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes  Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes
4266  supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph.  supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph.

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.71

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3