[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Apr 5 05:17:01 2005 UTC revision 1.44, Sat May 27 02:02:28 2006 UTC
# Line 1  Line 1 
1  package ERDB;  package ERDB;
2    
3          use strict;          use strict;
         use Carp;  
4          use Tracer;          use Tracer;
5          use DBKernel;      use DBrtns;
6          use Data::Dumper;          use Data::Dumper;
7          use XML::Simple;          use XML::Simple;
8          use DBQuery;          use DBQuery;
9          use DBObject;          use DBObject;
10          use Stats;          use Stats;
11          use Time::HiRes qw(gettimeofday);          use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13        use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
16    
# Line 33  Line 34 
34  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).
35  The B<FEATURE> entity also contains an optional virulence number. This is implemented  The B<FEATURE> entity also contains an optional virulence number. This is implemented
36  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number
37  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in the  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in
38  C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence number.  the C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence
39  If the virulence of I<ABC> is not known, there will not be any rows for it in C<FeatureVirulence>.  number. If the virulence of I<ABC> is not known, there will not be any rows for it in
40    C<FeatureVirulence>.
41    
42  Entities are connected by binary relationships implemented using single relations possessing the  Entities are connected by binary relationships implemented using single relations possessing the
43  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),
# Line 70  Line 72 
72  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet
73  fully implemented.  fully implemented.
74    
75    =head2 XML Database Description
76    
77    =head3 Data Types
78    
79    The ERDB system supports the following data types. Note that there are numerous string
80    types depending on the maximum length. Some database packages limit the total number of
81    characters you have in an index key; to insure the database works in all environments,
82    the type of string should be the shortest one possible that supports all the known values.
83    
84    =over 4
85    
86    =item char
87    
88    single ASCII character
89    
90    =item int
91    
92    32-bit signed integer
93    
94    =item date
95    
96    64-bit unsigned integer, representing a PERL date/time value
97    
98    =item text
99    
100    long string; Text fields cannot be used in indexes or sorting and do not support the
101    normal syntax of filter clauses, but can be up to a billion character in length
102    
103    =item float
104    
105    double-precision floating-point number
106    
107    =item boolean
108    
109    single-bit numeric value; The value is stored as a 16-bit signed integer (for
110    compatability with certain database packages), but the only values supported are
111    0 and 1.
112    
113    =item id-string
114    
115    variable-length string, maximum 25 characters
116    
117    =item key-string
118    
119    variable-length string, maximum 40 characters
120    
121    =item name-string
122    
123    variable-length string, maximum 80 characters
124    
125    =item medium-string
126    
127    variable-length string, maximum 160 characters
128    
129    =item string
130    
131    variable-length string, maximum 255 characters
132    
133    =item hash-string
134    
135    variable-length string, maximum 22 characters
136    
137    =back
138    
139    The hash-string data type has a special meaning. The actual key passed into the loader will
140    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
141    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
142    digest. Therefore, it is presumed the keys will be unique. When the database is actually
143    in use, the hashed keys will be presented rather than the original values. For this reason,
144    they should not be used for entities where the key is meaningful.
145    
146    =head3 Global Tags
147    
148    The entire database definition must be inside a B<Database> tag. The display name of
149    the database is given by the text associated with the B<Title> tag. The display name
150    is only used in the automated documentation. It has no other effect. The entities and
151    relationships are listed inside the B<Entities> and B<Relationships> tags,
152    respectively. None of these tags have attributes.
153    
154        <Database>
155            <Title>... display title here...</Title>
156            <Entities>
157                ... entity definitions here ...
158            </Entities>
159            <Relationships>
160                ... relationship definitions here...
161            </Relationships>
162        </Database>
163    
164    Entities, relationships, indexes, and fields all allow a text tag called B<Notes>.
165    The text inside the B<Notes> tag contains comments that will appear when the database
166    documentation is generated. Within a B<Notes> tag, you may use C<[i]> and C<[/i]> for
167    italics, C<[b]> and C<[/b]> for bold, and C<[p]> for a new paragraph.
168    
169    =head3 Fields
170    
171    Both entities and relationships have fields described by B<Field> tags. A B<Field>
172    tag can have B<Notes> associated with it. The complete set of B<Field> tags for an
173    object mus be inside B<Fields> tags.
174    
175        <Entity ... >
176            <Fields>
177                ... Field tags ...
178            </Fields>
179        </Entity>
180    
181    The attributes for the B<Field> tag are as follows.
182    
183    =over 4
184    
185    =item name
186    
187    Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
188    and the first character should be a letter. Most underlying databases are case-insensitive
189    with the respect to field names, so a best practice is to use lower-case letters only.
190    
191    =item type
192    
193    Data type of the field. The legal data types are given above.
194    
195    =item relation
196    
197    Name of the relation containing the field. This should only be specified for entity
198    fields. The ERDB system does not support optional fields or multi-occurring fields
199    in the primary relation of an entity. Instead, they are put into secondary relations.
200    So, for example, in the C<Genome> entity, the C<group-name> field indicates a special
201    grouping used to select a subset of the genomes. A given genome may not be in any
202    groups or may be in multiple groups. Therefore, C<group-name> specifies a relation
203    value. The relation name specified must be a valid table name. By convention, it is
204    usually the entity name followed by a qualifying word (e.g. C<GenomeGroup>). In an
205    entity, the fields without a relation attribute are said to belong to the
206    I<primary relation>. This relation has the same name as the entity itself.
207    
208    =back
209    
210    =head3 Indexes
211    
212    An entity can have multiple alternate indexes associated with it. The fields must
213    be from the primary relation. The alternate indexes assist in ordering results
214    from a query. A relationship can have up to two indexes-- a I<to-index> and a
215    I<from-index>. These order the results when crossing the relationship. For
216    example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the
217    from-index would order the contigs of a ganome, and the to-index would order
218    the genomes of a contig. A relationship's index must specify only fields in
219    the relationship.
220    
221    The indexes for an entity must be listed inside the B<Indexes> tag. The from-index
222    of a relationship is specified using the B<FromIndex> tag; the to-index is specified
223    using the B<ToIndex> tag.
224    
225    Each index can contain a B<Notes> tag. In addition, it will have an B<IndexFields>
226    tag containing the B<IndexField> tags. These specify, in order, the fields used in
227    the index. The attributes of an B<IndexField> tag are as follows.
228    
229    =over 4
230    
231    =item name
232    
233    Name of the field.
234    
235    =item order
236    
237    Sort order of the field-- C<ascending> or C<descending>.
238    
239    =back
240    
241    The B<Index>, B<FromIndex>, and B<ToIndex> tags themselves have no attributes.
242    
243    =head3 Object and Field Names
244    
245    By convention entity and relationship names use capital casing (e.g. C<Genome> or
246    C<HasRegionsIn>. Most underlying databases, however, are aggressively case-insensitive
247    with respect to relation names, converting them internally to all-upper case or
248    all-lower case.
249    
250    If syntax or parsing errors occur when you try to load or use an ERDB database, the
251    most likely reason is that one of your objects has an SQL reserved word as its name.
252    The list of SQL reserved words keeps increasing; however, most are unlikely to show
253    up as a noun or declarative verb phrase. The exceptions are C<Group>, C<User>,
254    C<Table>, C<Index>, C<Object>, C<Date>, C<Number>, C<Update>, C<Time>, C<Percent>,
255    C<Memo>, C<Order>, and C<Sum>. This problem can crop up in field names as well.
256    
257    Every entity has a field called C<id> that acts as its primary key. Every relationship
258    has fields called C<from-link> and C<to-link> that contain copies of the relevant
259    entity IDs. These are essentially ERDB's reserved words, and should not be used
260    for user-defined field names.
261    
262    =head3 Entities
263    
264    An entity is described by the B<Entity> tag. The entity can contain B<Notes>, an
265    B<Indexes> tag containing one or more secondary indexes, and a B<Fields> tag
266    containing one or more fields. The attributes of the B<Entity> tag are as follows.
267    
268    =over 4
269    
270    =item name
271    
272    Name of the entity. The entity name, by convention, uses capital casing (e.g. C<Genome>
273    or C<GroupBlock>) and should be a noun or noun phrase.
274    
275    =item keyType
276    
277    Data type of the primary key. The primary key is always named C<id>.
278    
279    =back
280    
281    =head3 Relationships
282    
283    A relationship is described by the C<Relationship> tag. Within a relationship,
284    there can be a C<Notes> tag, a C<Fields> tag containing the intersection data
285    fields, a C<FromIndex> tag containing the from-index, and a C<ToIndex> tag containing
286    the to-index.
287    
288    The C<Relationship> tag has the following attributes.
289    
290    =over 4
291    
292    =item name
293    
294    Name of the relationship. The relationship name, by convention, uses capital casing
295    (e.g. C<ContainsRegionIn> or C<HasContig>), and should be a declarative verb
296    phrase, designed to fit between the from-entity and the to-entity (e.g.
297    Block C<ContainsRegionIn> Genome).
298    
299    =item from
300    
301    Name of the entity from which the relationship starts.
302    
303    =item to
304    
305    Name of the entity to which the relationship proceeds.
306    
307    =item arity
308    
309    Relationship type: C<1M> for one-to-many and C<MM> for many-to-many.
310    
311    =back
312    
313  =cut  =cut
314    
315  # GLOBALS  # GLOBALS
# Line 77  Line 317 
317  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
318  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
319  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
320   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
321  my %TypeTable = ( char =>        { sqlType => 'CHAR(1)',                        maxLen => 1,                    dataGen => "StringGen('A')" },  # record sizes.
322                                    int =>         { sqlType => 'INTEGER',                        maxLen => 20,                   dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
323                                    string =>  { sqlType => 'VARCHAR(255)',               maxLen => 255,                  dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
324                                    text =>        { sqlType => 'TEXT',                           maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
325                                    date =>        { sqlType => 'BIGINT',                         maxLen => 80,                   dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
326                                    float =>       { sqlType => 'DOUBLE PRECISION',       maxLen => 40,                   dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
327                                    boolean => { sqlType => 'SMALLINT',                   maxLen => 1,                    dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
328                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
329                     'hash-string' =>
330                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, dataGen => "SringGen(22)" },
331                     'id-string' =>
332                                 { sqlType => 'VARCHAR(25)',        maxLen => 25,           avgLen =>  25, dataGen => "SringGen(22)" },
333                               'key-string' =>                               'key-string' =>
334                                                           { sqlType => 'VARCHAR(40)',            maxLen => 40,                   dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
335                                   'name-string' =>                                   'name-string' =>
336                                                           { sqlType => 'VARCHAR(80)',            maxLen => 80,                   dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
337                                   'medium-string' =>                                   'medium-string' =>
338                                                           { sqlType => 'VARCHAR(160)',           maxLen => 160,                  dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
339                                  );                                  );
340    
341  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 140  Line 385 
385                                   _metaData => $metaData                                   _metaData => $metaData
386                             };                             };
387          # Bless and return it.          # Bless and return it.
388          bless $self;      bless $self, $class;
389          return $self;          return $self;
390  }  }
391    
392  =head3 ShowMetaData  =head3 ShowMetaData
393    
394  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
395    
396  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
397  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 278  Line 523 
523          print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");          print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");
524          # Loop through the joins.          # Loop through the joins.
525          my $joinTable = $metadata->{Joins};          my $joinTable = $metadata->{Joins};
526          for my $joinKey (sort keys %{$joinTable}) {      my @joinKeys = keys %{$joinTable};
527        for my $joinKey (sort @joinKeys) {
528                  # Separate out the source, the target, and the join clause.                  # Separate out the source, the target, and the join clause.
529                  $joinKey =~ m!([^/]*)/(.*)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
530                  my ($source, $target, $clause) = ($self->ComputeObjectSentence($1),          my ($sourceRelation, $targetRelation) = ($1, $2);
531                                                                                    $self->ComputeObjectSentence($2),          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
532                                                                                    $joinTable->{$joinKey});          my $source = $self->ComputeObjectSentence($sourceRelation);
533            my $target = $self->ComputeObjectSentence($targetRelation);
534            my $clause = $joinTable->{$joinKey};
535                  # Display them in a table row.                  # Display them in a table row.
536                  print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";                  print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";
537          }          }
# Line 298  Line 546 
546    
547  =head3 DumpMetaData  =head3 DumpMetaData
548    
549  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
550    
551  Return a dump of the metadata structure.  Return a dump of the metadata structure.
552    
# Line 313  Line 561 
561    
562  =head3 CreateTables  =head3 CreateTables
563    
564  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
565    
566  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
567  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 325  Line 573 
573  sub CreateTables {  sub CreateTables {
574          # Get the parameters.          # Get the parameters.
575          my ($self) = @_;          my ($self) = @_;
576          my $metadata = $self->{_metaData};      # Get the relation names.
577          my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
578          # Loop through the entities.      # Loop through the relations.
579          while (my ($entityName, $entityData) = each %{$metadata->{Entities}}) {      for my $relationName (@relNames) {
                 # Tell the user what we're doing.  
                 Trace("Creating relations for entity $entityName.") if T(1);  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entityData->{Relations}}) {  
580                          # Create a table for this relation.                          # Create a table for this relation.
581                          $self->CreateTable($relationName);                          $self->CreateTable($relationName);
582                          Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
                 }  
         }  
         # Loop through the relationships.  
         my $relationshipTable = $metadata->{Relationships};  
         for my $relationshipName (keys %{$metadata->{Relationships}}) {  
                 # Create a table for this relationship.  
                 Trace("Creating relationship $relationshipName.") if T(1);  
                 $self->CreateTable($relationshipName);  
583          }          }
584  }  }
585    
586  =head3 CreateTable  =head3 CreateTable
587    
588  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
589    
590  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
591    
# Line 359  Line 595 
595    
596  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
597    
598  =item $indexFlag  =item indexFlag
599    
600  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
601  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
602    
603    =item estimatedRows (optional)
604    
605    If specified, the estimated maximum number of rows for the relation. This
606    information allows the creation of tables using storage engines that are
607    faster but require size estimates, such as MyISAM.
608    
609  =back  =back
610    
611  =cut  =cut
612    
613  sub CreateTable {  sub CreateTable {
614          # Get the parameters.          # Get the parameters.
615          my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
616          # Get the database handle.          # Get the database handle.
617          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
618          # Get the relation data and determine whether or not the relation is primary.          # Get the relation data and determine whether or not the relation is primary.
# Line 394  Line 636 
636          # Insure the table is not already there.          # Insure the table is not already there.
637          $dbh->drop_table(tbl => $relationName);          $dbh->drop_table(tbl => $relationName);
638          Trace("Table $relationName dropped.") if T(2);          Trace("Table $relationName dropped.") if T(2);
639        # If there are estimated rows, create an estimate so we can take advantage of
640        # faster DB technologies.
641        my $estimation = undef;
642        if ($estimatedRows) {
643            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
644        }
645          # Create the table.          # Create the table.
646          Trace("Creating table $relationName: $fieldThing") if T(2);          Trace("Creating table $relationName: $fieldThing") if T(2);
647          $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
648          Trace("Relation $relationName created in database.") if T(2);          Trace("Relation $relationName created in database.") if T(2);
649          # If we want to build the indexes, we do it here.          # If we want to build the indexes, we do it here.
650          if ($indexFlag) {          if ($indexFlag) {
# Line 404  Line 652 
652          }          }
653  }  }
654    
655    =head3 VerifyFields
656    
657    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
658    
659    Run through the list of proposed field values, insuring that all the character fields are
660    below the maximum length. If any fields are too long, they will be truncated in place.
661    
662    =over 4
663    
664    =item relName
665    
666    Name of the relation for which the specified fields are destined.
667    
668    =item fieldList
669    
670    Reference to a list, in order, of the fields to be put into the relation.
671    
672    =item RETURN
673    
674    Returns the number of fields truncated.
675    
676    =back
677    
678    =cut
679    
680    sub VerifyFields {
681        # Get the parameters.
682        my ($self, $relName, $fieldList) = @_;
683        # Initialize the return value.
684        my $retVal = 0;
685        # Get the relation definition.
686        my $relData = $self->_FindRelation($relName);
687        # Get the list of field descriptors.
688        my $fieldTypes = $relData->{Fields};
689        my $fieldCount = scalar @{$fieldTypes};
690        # Loop through the two lists.
691        for (my $i = 0; $i < $fieldCount; $i++) {
692            # Get the type of the current field.
693            my $fieldType = $fieldTypes->[$i]->{type};
694            # If it's a character field, verify the length.
695            if ($fieldType =~ /string/) {
696                my $maxLen = $TypeTable{$fieldType}->{maxLen};
697                my $oldString = $fieldList->[$i];
698                if (length($oldString) > $maxLen) {
699                    # Here it's too big, so we truncate it.
700                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
701                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
702                    $retVal++;
703                }
704            }
705        }
706        # Return the truncation count.
707        return $retVal;
708    }
709    
710    =head3 DigestFields
711    
712    C<< $erdb->DigestFields($relName, $fieldList); >>
713    
714    Digest the strings in the field list that correspond to data type C<hash-string> in the
715    specified relation.
716    
717    =over 4
718    
719    =item relName
720    
721    Name of the relation to which the fields belong.
722    
723    =item fieldList
724    
725    List of field contents to be loaded into the relation.
726    
727    =back
728    
729    =cut
730    #: Return Type ;
731    sub DigestFields {
732        # Get the parameters.
733        my ($self, $relName, $fieldList) = @_;
734        # Get the relation definition.
735        my $relData = $self->_FindRelation($relName);
736        # Get the list of field descriptors.
737        my $fieldTypes = $relData->{Fields};
738        my $fieldCount = scalar @{$fieldTypes};
739        # Loop through the two lists.
740        for (my $i = 0; $i < $fieldCount; $i++) {
741            # Get the type of the current field.
742            my $fieldType = $fieldTypes->[$i]->{type};
743            # If it's a hash string, digest it in place.
744            if ($fieldType eq 'hash-string') {
745                $fieldList->[$i] = md5_base64($fieldList->[$i]);
746            }
747        }
748    }
749    
750  =head3 CreateIndex  =head3 CreateIndex
751    
752  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
753    
754  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
755  is the case in L</LoadTable>), it is best to create the indexes after the load. If that is  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
756  the case, then L</CreateTable> should be called with the index flag set to FALSE, and this  If that is the case, then L</CreateTable> should be called with the index flag set to
757  method used after the load to create the indexes for the table.  FALSE, and this method used after the load to create the indexes for the table.
758    
759  =cut  =cut
760    
# Line 423  Line 766 
766          # Get the database handle.          # Get the database handle.
767          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
768          # Now we need to create this relation's indexes. We do this by looping through its index table.          # Now we need to create this relation's indexes. We do this by looping through its index table.
769          while (my ($indexName, $indexData) = each %{$relationData->{Indexes}}) {      my $indexHash = $relationData->{Indexes};
770        for my $indexName (keys %{$indexHash}) {
771            my $indexData = $indexHash->{$indexName};
772                  # Get the index's field list.                  # Get the index's field list.
773                  my @fieldList = _FixNames(@{$indexData->{IndexFields}});                  my @fieldList = _FixNames(@{$indexData->{IndexFields}});
774                  my $flds = join(', ', @fieldList);                  my $flds = join(', ', @fieldList);
775                  # Get the index's uniqueness flag.                  # Get the index's uniqueness flag.
776                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
777                  # Create the index.                  # Create the index.
778                  $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
779                                        flds => $flds, unique => $unique);
780            if ($rv) {
781                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);
782            } else {
783                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
784            }
785          }          }
786  }  }
787    
788  =head3 LoadTables  =head3 LoadTables
789    
790  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
791    
792  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
793  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 480  Line 830 
830          $directoryName =~ s!/\\$!!;          $directoryName =~ s!/\\$!!;
831          # Declare the return variable.          # Declare the return variable.
832          my $retVal = Stats->new();          my $retVal = Stats->new();
833          # Get the metadata structure.      # Get the relation names.
834          my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
835          # Loop through the entities.      for my $relationName (@relNames) {
         for my $entity (values %{$metaData->{Entities}}) {  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entity->{Relations}}) {  
836                          # Try to load this relation.                          # Try to load this relation.
837                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
838                          # Accumulate the statistics.                          # Accumulate the statistics.
839                          $retVal->Accumulate($result);                          $retVal->Accumulate($result);
840                  }                  }
         }  
         # Loop through the relationships.  
         for my $relationshipName (keys %{$metaData->{Relationships}}) {  
                 # Try to load this relationship's relation.  
                 my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
                 # Accumulate the statistics.  
                 $retVal->Accumulate($result);  
         }  
841          # Add the duration of the load to the statistical object.          # Add the duration of the load to the statistical object.
842          $retVal->Add('duration', gettimeofday - $startTime);          $retVal->Add('duration', gettimeofday - $startTime);
843          # Return the accumulated statistics.          # Return the accumulated statistics.
844          return $retVal;          return $retVal;
845  }  }
846    
847    
848  =head3 GetTableNames  =head3 GetTableNames
849    
850  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
851    
852  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
853    
# Line 524  Line 864 
864    
865  =head3 GetEntityTypes  =head3 GetEntityTypes
866    
867  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
868    
869  Return a list of the entity type names.  Return a list of the entity type names.
870    
# Line 539  Line 879 
879          return sort keys %{$entityList};          return sort keys %{$entityList};
880  }  }
881    
882    =head3 IsEntity
883    
884    C<< my $flag = $erdb->IsEntity($entityName); >>
885    
886    Return TRUE if the parameter is an entity name, else FALSE.
887    
888    =over 4
889    
890    =item entityName
891    
892    Object name to be tested.
893    
894    =item RETURN
895    
896    Returns TRUE if the specified string is an entity name, else FALSE.
897    
898    =back
899    
900    =cut
901    
902    sub IsEntity {
903        # Get the parameters.
904        my ($self, $entityName) = @_;
905        # Test to see if it's an entity.
906        return exists $self->{_metaData}->{Entities}->{$entityName};
907    }
908    
909  =head3 Get  =head3 Get
910    
911  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
912    
913  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
914  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 549  Line 916 
916  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
917  $genus.  $genus.
918    
919  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
920    
921  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
922  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
923    
924  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
925    
926  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
927  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 566  Line 933 
933  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
934  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
935    
936  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
937    
938  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
939  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
940  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
941  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
942  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
943  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
944  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
945    
946    If an entity or relationship is mentioned twice, the name for the second occurrence will
947    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
948    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
949    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
950    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
951    
952  =over 4  =over 4
953    
954  =item objectNames  =item objectNames
# Line 599  Line 971 
971    
972  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
973    
974    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
975    be processed. The idea is to make it less likely to find the verb by accident.
976    
977  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
978  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
979  relation.  relation.
980    
981    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
982    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
983    a positive number. So, for example
984    
985    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
986    
987    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
988    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
989    use
990    
991    C<< "LIMIT 10" >>
992    
993  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
994    
995  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 618  Line 1005 
1005  sub Get {  sub Get {
1006          # Get the parameters.          # Get the parameters.
1007          my ($self, $objectNames, $filterClause, @params) = @_;          my ($self, $objectNames, $filterClause, @params) = @_;
1008        # Adjust the list of object names to account for multiple occurrences of the
1009        # same object. We start with a hash table keyed on object name that will
1010        # return the object suffix. The first time an object is encountered it will
1011        # not be found in the hash. The next time the hash will map the object name
1012        # to 2, then 3, and so forth.
1013        my %objectHash = ();
1014        # This list will contain the object names as they are to appear in the
1015        # FROM list.
1016        my @fromList = ();
1017        # This list contains the suffixed object name for each object. It is exactly
1018        # parallel to the list in the $objectNames parameter.
1019        my @mappedNameList = ();
1020        # Finally, this hash translates from a mapped name to its original object name.
1021        my %mappedNameHash = ();
1022        # Now we create the lists. Note that for every single name we push something into
1023        # @fromList and @mappedNameList. This insures that those two arrays are exactly
1024        # parallel to $objectNames.
1025        for my $objectName (@{$objectNames}) {
1026            # Get the next suffix for this object.
1027            my $suffix = $objectHash{$objectName};
1028            if (! $suffix) {
1029                # Here we are seeing the object for the first time. The object name
1030                # is used as is.
1031                push @mappedNameList, $objectName;
1032                push @fromList, $objectName;
1033                $mappedNameHash{$objectName} = $objectName;
1034                # Denote the next suffix will be 2.
1035                $objectHash{$objectName} = 2;
1036            } else {
1037                # Here we've seen the object before. We construct a new name using
1038                # the suffix from the hash and update the hash.
1039                my $mappedName = "$objectName$suffix";
1040                $objectHash{$objectName} = $suffix + 1;
1041                # The FROM list has the object name followed by the mapped name. This
1042                # tells SQL it's still the same table, but we're using a different name
1043                # for it to avoid confusion.
1044                push @fromList, "$objectName $mappedName";
1045                # The mapped-name list contains the real mapped name.
1046                push @mappedNameList, $mappedName;
1047                # Finally, enable us to get back from the mapped name to the object name.
1048                $mappedNameHash{$mappedName} = $objectName;
1049            }
1050        }
1051          # Construct the SELECT statement. The general pattern is          # Construct the SELECT statement. The general pattern is
1052          #          #
1053          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
1054          #          #
1055          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1056          my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
1057                                  join(', ', @{$objectNames});                  join(', ', @fromList);
1058          # Check for a filter clause.          # Check for a filter clause.
1059          if ($filterClause) {          if ($filterClause) {
1060                  # Here we have one, so we convert its field names and add it to the query. First,                  # Here we have one, so we convert its field names and add it to the query. First,
# Line 632  Line 1062 
1062                  my $filterString = $filterClause;                  my $filterString = $filterClause;
1063                  # Next, we sort the object names by length. This helps protect us from finding                  # Next, we sort the object names by length. This helps protect us from finding
1064                  # object names inside other object names when we're doing our search and replace.                  # object names inside other object names when we're doing our search and replace.
1065                  my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1066                  # We will also keep a list of conditions to add to the WHERE clause in order to link                  # We will also keep a list of conditions to add to the WHERE clause in order to link
1067                  # entities and relationships as well as primary relations to secondary ones.                  # entities and relationships as well as primary relations to secondary ones.
1068                  my @joinWhere = ();                  my @joinWhere = ();
1069                  # The final preparatory step is to create a hash table of relation names. The                  # The final preparatory step is to create a hash table of relation names. The
1070                  # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1071                  my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1072                  for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1073                          $fromNames{$objectName} = 1;          # occurring or optional fields.
1074                  }          my %fromNames = map { $_ => 1 } @sortedNames;
1075                  # We are ready to begin. We loop through the object names, replacing each                  # We are ready to begin. We loop through the object names, replacing each
1076                  # object name's field references by the corresponding SQL field reference.                  # object name's field references by the corresponding SQL field reference.
1077                  # Along the way, if we find a secondary relation, we will need to add it                  # Along the way, if we find a secondary relation, we will need to add it
1078                  # to the FROM clause.                  # to the FROM clause.
1079                  for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1080                          # Get the length of the object name plus 2. This is the value we add to the                          # Get the length of the object name plus 2. This is the value we add to the
1081                          # size of the field name to determine the size of the field reference as a                          # size of the field name to determine the size of the field reference as a
1082                          # whole.                          # whole.
1083                          my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1084                # Get the real object name for this mapped name.
1085                my $objectName = $mappedNameHash{$mappedName};
1086                Trace("Processing $mappedName for object $objectName.") if T(4);
1087                          # Get the object's field list.                          # Get the object's field list.
1088                          my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1089                          # Find the field references for this object.                          # Find the field references for this object.
1090                          while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1091                                  # At this point, $1 contains the field name, and the current position                                  # At this point, $1 contains the field name, and the current position
1092                                  # is set immediately after the final parenthesis. We pull out the name of                                  # is set immediately after the final parenthesis. We pull out the name of
1093                                  # the field and the position and length of the field reference as a whole.                                  # the field and the position and length of the field reference as a whole.
# Line 665  Line 1098 
1098                                  if (!exists $fieldList->{$fieldName}) {                                  if (!exists $fieldList->{$fieldName}) {
1099                                          Confess("Field $fieldName not found for object $objectName.");                                          Confess("Field $fieldName not found for object $objectName.");
1100                                  } else {                                  } else {
1101                        Trace("Processing $fieldName at position $pos.") if T(4);
1102                                          # Get the field's relation.                                          # Get the field's relation.
1103                                          my $relationName = $fieldList->{$fieldName}->{relation};                                          my $relationName = $fieldList->{$fieldName}->{relation};
1104                        # Now we have a secondary relation. We need to insure it matches the
1105                        # mapped name of the primary relation. First we peel off the suffix
1106                        # from the mapped name.
1107                        my $mappingSuffix = substr $mappedName, length($objectName);
1108                        # Put the mapping suffix onto the relation name to get the
1109                        # mapped relation name.
1110                        my $mappedRelationName = "$relationName$mappingSuffix";
1111                                          # Insure the relation is in the FROM clause.                                          # Insure the relation is in the FROM clause.
1112                                          if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1113                                                  # Add the relation to the FROM clause.                                                  # Add the relation to the FROM clause.
1114                            if ($mappedRelationName eq $relationName) {
1115                                # The name is un-mapped, so we add it without
1116                                # any frills.
1117                                                  $command .= ", $relationName";                                                  $command .= ", $relationName";
                                                 # Create its join sub-clause.  
1118                                                  push @joinWhere, "$objectName.id = $relationName.id";                                                  push @joinWhere, "$objectName.id = $relationName.id";
1119                                                  # Denote we have it available for future fields.                          } else {
1120                                                  $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1121                                $command .= ", $relationName $mappedRelationName";
1122                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1123                            }
1124                            # Denote we have this relation available for future fields.
1125                            $fromNames{$mappedRelationName} = 1;
1126                                          }                                          }
1127                                          # Form an SQL field reference from the relation name and the field name.                                          # Form an SQL field reference from the relation name and the field name.
1128                                          my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1129                                          # Put it into the filter string in place of the old value.                                          # Put it into the filter string in place of the old value.
1130                                          substr($filterString, $pos, $len) = $sqlReference;                                          substr($filterString, $pos, $len) = $sqlReference;
1131                                          # Reposition the search.                                          # Reposition the search.
# Line 689  Line 1137 
1137                  # is more than one object in the object list. We start with the first object and                  # is more than one object in the object list. We start with the first object and
1138                  # run through the objects after it. Note also that we make a safety copy of the                  # run through the objects after it. Note also that we make a safety copy of the
1139                  # list before running through it.                  # list before running through it.
1140                  my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1141                  my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1142                  # Get the join table.                  # Get the join table.
1143                  my $joinTable = $self->{_metaData}->{Joins};                  my $joinTable = $self->{_metaData}->{Joins};
1144                  # Loop through the object list.                  # Loop through the object list.
1145                  for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1146                          # Look for a join.              # Look for a join using the real object names.
1147                my $lastObject = $mappedNameHash{$lastMappedObject};
1148                my $thisObject = $mappedNameHash{$thisMappedObject};
1149                          my $joinKey = "$lastObject/$thisObject";                          my $joinKey = "$lastObject/$thisObject";
1150                          if (!exists $joinTable->{$joinKey}) {                          if (!exists $joinTable->{$joinKey}) {
1151                                  # Here there's no join, so we throw an error.                                  # Here there's no join, so we throw an error.
1152                                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1153                          } else {                          } else {
1154                                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1155                                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1156                    # Fix the names.
1157                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1158                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1159                    push @joinWhere, $unMappedJoin;
1160                                  # Save this object as the last object for the next iteration.                                  # Save this object as the last object for the next iteration.
1161                                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1162                          }                          }
1163                  }                  }
1164                  # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
1165                  # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
1166            # We'll put the ORDER BY / LIMIT clauses in the following variable.
1167                  my $orderClause = "";                  my $orderClause = "";
1168                  # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
1169                  if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
1170                          # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
1171                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
1172                          my $pos = pos $filterString;                          my $pos = pos $filterString;
1173                          $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
1174                          $filterString = $1;                          $filterString = $1;
1175                  }                  }
1176                  # Add the filter and the join clauses (if any) to the SELECT command.                  # Add the filter and the join clauses (if any) to the SELECT command.
1177                  if ($filterString) {                  if ($filterString) {
1178                Trace("Filter string is \"$filterString\".") if T(4);
1179                          push @joinWhere, "($filterString)";                          push @joinWhere, "($filterString)";
1180                  }                  }
1181                  if (@joinWhere) {                  if (@joinWhere) {
1182                          $command .= " WHERE " . join(' AND ', @joinWhere);                          $command .= " WHERE " . join(' AND ', @joinWhere);
1183                  }                  }
1184                  # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
1185                  if ($orderClause) {                  if ($orderClause) {
1186                          $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
1187                  }                  }
1188          }          }
1189          Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(SQL => 3);
1190          Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
1191          my $sth = $dbh->prepare_command($command);          my $sth = $dbh->prepare_command($command);
1192          # Execute it with the parameters bound in.          # Execute it with the parameters bound in.
1193          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1194        # Now we create the relation map, which enables DBQuery to determine the order, name
1195        # and mapped name for each object in the query.
1196        my @relationMap = ();
1197        for my $mappedName (@mappedNameList) {
1198            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1199        }
1200          # Return the statement object.          # Return the statement object.
1201          my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1202          return $retVal;          return $retVal;
1203  }  }
1204    
1205    =head3 Delete
1206    
1207    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1208    
1209    Delete an entity instance from the database. The instance is deleted along with all entity and
1210    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1211    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1212    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1213    dependent relationship.
1214    
1215    =over 4
1216    
1217    =item entityName
1218    
1219    Name of the entity type for the instance being deleted.
1220    
1221    =item objectID
1222    
1223    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1224    then it is presumed to by a LIKE pattern.
1225    
1226    =item testFlag
1227    
1228    If TRUE, the delete statements will be traced without being executed.
1229    
1230    =item RETURN
1231    
1232    Returns a statistics object indicating how many records of each particular table were
1233    deleted.
1234    
1235    =back
1236    
1237    =cut
1238    #: Return Type $%;
1239    sub Delete {
1240        # Get the parameters.
1241        my ($self, $entityName, $objectID, $testFlag) = @_;
1242        # Declare the return variable.
1243        my $retVal = Stats->new();
1244        # Get the DBKernel object.
1245        my $db = $self->{_dbh};
1246        # We're going to generate all the paths branching out from the starting entity. One of
1247        # the things we have to be careful about is preventing loops. We'll use a hash to
1248        # determine if we've hit a loop.
1249        my %alreadyFound = ();
1250        # These next lists will serve as our result stack. We start by pushing object lists onto
1251        # the stack, and then popping them off to do the deletes. This means the deletes will
1252        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1253        # sure we don't delete records that might be needed to forge relationships back to the
1254        # original item. We have two lists-- one for TO-relationships, and one for
1255        # FROM-relationships and entities.
1256        my @fromPathList = ();
1257        my @toPathList = ();
1258        # This final hash is used to remember what work still needs to be done. We push paths
1259        # onto the list, then pop them off to extend the paths. We prime it with the starting
1260        # point. Note that we will work hard to insure that the last item on a path in the
1261        # TODO list is always an entity.
1262        my @todoList = ([$entityName]);
1263        while (@todoList) {
1264            # Get the current path.
1265            my $current = pop @todoList;
1266            # Copy it into a list.
1267            my @stackedPath = @{$current};
1268            # Pull off the last item on the path. It will always be an entity.
1269            my $entityName = pop @stackedPath;
1270            # Add it to the alreadyFound list.
1271            $alreadyFound{$entityName} = 1;
1272            # Get the entity data.
1273            my $entityData = $self->_GetStructure($entityName);
1274            # The first task is to loop through the entity's relation. A DELETE command will
1275            # be needed for each of them.
1276            my $relations = $entityData->{Relations};
1277            for my $relation (keys %{$relations}) {
1278                my @augmentedList = (@stackedPath, $relation);
1279                push @fromPathList, \@augmentedList;
1280            }
1281            # Now we need to look for relationships connected to this entity.
1282            my $relationshipList = $self->{_metaData}->{Relationships};
1283            for my $relationshipName (keys %{$relationshipList}) {
1284                my $relationship = $relationshipList->{$relationshipName};
1285                # Check the FROM field. We're only interested if it's us.
1286                if ($relationship->{from} eq $entityName) {
1287                    # Add the path to this relationship.
1288                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1289                    push @fromPathList, \@augmentedList;
1290                    # Check the arity. If it's MM we're done. If it's 1M
1291                    # and the target hasn't been seen yet, we want to
1292                    # stack the entity for future processing.
1293                    if ($relationship->{arity} eq '1M') {
1294                        my $toEntity = $relationship->{to};
1295                        if (! exists $alreadyFound{$toEntity}) {
1296                            # Here we have a new entity that's dependent on
1297                            # the current entity, so we need to stack it.
1298                            my @stackList = (@augmentedList, $toEntity);
1299                            push @fromPathList, \@stackList;
1300                        } else {
1301                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1302                        }
1303                    }
1304                }
1305                # Now check the TO field. In this case only the relationship needs
1306                # deletion.
1307                if ($relationship->{to} eq $entityName) {
1308                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1309                    push @toPathList, \@augmentedList;
1310                }
1311            }
1312        }
1313        # Create the first qualifier for the WHERE clause. This selects the
1314        # keys of the primary entity records to be deleted. When we're deleting
1315        # from a dependent table, we construct a join page from the first qualifier
1316        # to the table containing the dependent records to delete.
1317        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1318        # We need to make two passes. The first is through the to-list, and
1319        # the second through the from-list. The from-list is second because
1320        # the to-list may need to pass through some of the entities the
1321        # from-list would delete.
1322        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1323        # Now it's time to do the deletes. We do it in two passes.
1324        for my $keyName ('to_link', 'from_link') {
1325            # Get the list for this key.
1326            my @pathList = @{$stackList{$keyName}};
1327            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1328            # Loop through this list.
1329            while (my $path = pop @pathList) {
1330                # Get the table whose rows are to be deleted.
1331                my @pathTables = @{$path};
1332                # Start the DELETE statement. We need to call DBKernel because the
1333                # syntax of a DELETE-USING varies among DBMSs.
1334                my $target = $pathTables[$#pathTables];
1335                my $stmt = $db->SetUsing(@pathTables);
1336                # Now start the WHERE. The first thing is the ID field from the starting table. That
1337                # starting table will either be the entity relation or one of the entity's
1338                # sub-relations.
1339                $stmt .= " WHERE $pathTables[0].id $qualifier";
1340                # Now we run through the remaining entities in the path, connecting them up.
1341                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1342                    # Connect the current relationship to the preceding entity.
1343                    my ($entity, $rel) = @pathTables[$i-1,$i];
1344                    # The style of connection depends on the direction of the relationship.
1345                    $stmt .= " AND $entity.id = $rel.$keyName";
1346                    if ($i + 1 <= $#pathTables) {
1347                        # Here there's a next entity, so connect that to the relationship's
1348                        # to-link.
1349                        my $entity2 = $pathTables[$i+1];
1350                        $stmt .= " AND $rel.to_link = $entity2.id";
1351                    }
1352                }
1353                # Now we have our desired DELETE statement.
1354                if ($testFlag) {
1355                    # Here the user wants to trace without executing.
1356                    Trace($stmt) if T(0);
1357                } else {
1358                    # Here we can delete. Note that the SQL method dies with a confessing
1359                    # if an error occurs, so we just go ahead and do it.
1360                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1361                    my $rv = $db->SQL($stmt, 0, $objectID);
1362                    # Accumulate the statistics for this delete. The only rows deleted
1363                    # are from the target table, so we use its name to record the
1364                    # statistic.
1365                    $retVal->Add($target, $rv);
1366                }
1367            }
1368        }
1369        # Return the result.
1370        return $retVal;
1371    }
1372    
1373    =head3 GetList
1374    
1375    C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
1376    
1377    Return a list of object descriptors for the specified objects as determined by the
1378    specified filter clause.
1379    
1380    This method is essentially the same as L</Get> except it returns a list of objects rather
1381    than a query object that can be used to get the results one record at a time.
1382    
1383    =over 4
1384    
1385    =item objectNames
1386    
1387    List containing the names of the entity and relationship objects to be retrieved.
1388    
1389    =item filterClause
1390    
1391    WHERE clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1392    be parameterized with parameter markers (C<?>). Each field used in the WHERE clause must be
1393    specified in the standard form B<I<objectName>(I<fieldName>)>. Any parameters specified
1394    in the filter clause should be added to the parameter list as additional parameters. The
1395    fields in a filter clause can come from primary entity relations, relationship relations,
1396    or secondary entity relations; however, all of the entities and relationships involved must
1397    be included in the list of object names.
1398    
1399    The filter clause can also specify a sort order. To do this, simply follow the filter string
1400    with an ORDER BY clause. For example, the following filter string gets all genomes for a
1401    particular genus and sorts them by species name.
1402    
1403    C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1404    
1405    The rules for field references in a sort order are the same as those for field references in the
1406    filter clause in general; however, odd things may happen if a sort field is from a secondary
1407    relation.
1408    
1409    =item param1, param2, ..., paramN
1410    
1411    Parameter values to be substituted into the filter clause.
1412    
1413    =item RETURN
1414    
1415    Returns a list of B<DBObject>s that satisfy the query conditions.
1416    
1417    =back
1418    
1419    =cut
1420    #: Return Type @%
1421    sub GetList {
1422        # Get the parameters.
1423        my ($self, $objectNames, $filterClause, @params) = @_;
1424        # Declare the return variable.
1425        my @retVal = ();
1426        # Perform the query.
1427        my $query = $self->Get($objectNames, $filterClause, @params);
1428        # Loop through the results.
1429        while (my $object = $query->Fetch) {
1430            push @retVal, $object;
1431        }
1432        # Return the result.
1433        return @retVal;
1434    }
1435    
1436  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1437    
1438  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1439    
1440  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1441    
# Line 776  Line 1470 
1470    
1471  =head3 DumpRelations  =head3 DumpRelations
1472    
1473  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1474    
1475  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1476  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 797  Line 1491 
1491          # Now we need to run through all the relations. First, we loop through the entities.          # Now we need to run through all the relations. First, we loop through the entities.
1492          my $metaData = $self->{_metaData};          my $metaData = $self->{_metaData};
1493          my $entities = $metaData->{Entities};          my $entities = $metaData->{Entities};
1494          while (my ($entityName, $entityStructure) = each %{$entities}) {      for my $entityName (keys %{$entities}) {
1495            my $entityStructure = $entities->{$entityName};
1496                  # Get the entity's relations.                  # Get the entity's relations.
1497                  my $relationList = $entityStructure->{Relations};                  my $relationList = $entityStructure->{Relations};
1498                  # Loop through the relations, dumping them.                  # Loop through the relations, dumping them.
1499                  while (my ($relationName, $relation) = each %{$relationList}) {          for my $relationName (keys %{$relationList}) {
1500                my $relation = $relationList->{$relationName};
1501                          $self->_DumpRelation($outputDirectory, $relationName, $relation);                          $self->_DumpRelation($outputDirectory, $relationName, $relation);
1502                  }                  }
1503          }          }
1504          # Next, we loop through the relationships.          # Next, we loop through the relationships.
1505          my $relationships = $metaData->{Relationships};          my $relationships = $metaData->{Relationships};
1506          while (my ($relationshipName, $relationshipStructure) = each %{$relationships}) {      for my $relationshipName (keys %{$relationships}) {
1507            my $relationshipStructure = $relationships->{$relationshipName};
1508                  # Dump this relationship's relation.                  # Dump this relationship's relation.
1509                  $self->_DumpRelation($outputDirectory, $relationshipName, $relationshipStructure->{Relations}->{$relationshipName});                  $self->_DumpRelation($outputDirectory, $relationshipName, $relationshipStructure->{Relations}->{$relationshipName});
1510          }          }
# Line 815  Line 1512 
1512    
1513  =head3 InsertObject  =head3 InsertObject
1514    
1515  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1516    
1517  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1518  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 824  Line 1521 
1521  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1522  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1523    
1524  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1525    
1526  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1527  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1528    
1529  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1530    
1531  =over 4  =over 4
1532    
# Line 861  Line 1558 
1558          # Loop through the relations. We'll build insert statements for each one. If a relation is          # Loop through the relations. We'll build insert statements for each one. If a relation is
1559          # secondary, we may end up generating multiple insert statements. If an error occurs, we          # secondary, we may end up generating multiple insert statements. If an error occurs, we
1560          # stop the loop.          # stop the loop.
1561          while ($retVal && (my ($relationName, $relationDefinition) = each %{$relationTable})) {      my @relationList = keys %{$relationTable};
1562        for (my $i = 0; $retVal && $i <= $#relationList; $i++) {
1563            my $relationName = $relationList[$i];
1564            my $relationDefinition = $relationTable->{$relationName};
1565                  # Get the relation's fields. For each field we will collect a value in the corresponding                  # Get the relation's fields. For each field we will collect a value in the corresponding
1566                  # position of the @valueList array. If one of the fields is missing, we will add it to the                  # position of the @valueList array. If one of the fields is missing, we will add it to the
1567                  # @missing list.                  # @missing list.
# Line 951  Line 1651 
1651    
1652  =head3 LoadTable  =head3 LoadTable
1653    
1654  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1655    
1656  Load data from a tab-delimited file into a specified table, optionally re-creating the table first.  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1657    first.
1658    
1659  =over 4  =over 4
1660    
# Line 971  Line 1672 
1672    
1673  =item RETURN  =item RETURN
1674    
1675  Returns a statistical object containing the number of records read and a list of the error messages.  Returns a statistical object containing a list of the error messages.
1676    
1677  =back  =back
1678    
# Line 982  Line 1683 
1683          # Create the statistical return object.          # Create the statistical return object.
1684          my $retVal = _GetLoadStats();          my $retVal = _GetLoadStats();
1685          # Trace the fact of the load.          # Trace the fact of the load.
1686          Trace("Loading table $relationName from $fileName") if T(1);      Trace("Loading table $relationName from $fileName") if T(2);
1687          # Get the database handle.          # Get the database handle.
1688          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1689        # Get the input file size.
1690        my $fileSize = -s $fileName;
1691          # Get the relation data.          # Get the relation data.
1692          my $relation = $self->_FindRelation($relationName);          my $relation = $self->_FindRelation($relationName);
1693          # Check the truncation flag.          # Check the truncation flag.
1694          if ($truncateFlag) {          if ($truncateFlag) {
1695                  Trace("Creating table $relationName") if T(1);          Trace("Creating table $relationName") if T(2);
1696            # Compute the row count estimate. We take the size of the load file,
1697            # divide it by the estimated row size, and then multiply by 1.5 to
1698            # leave extra room. We postulate a minimum row count of 1000 to
1699            # prevent problems with incoming empty load files.
1700            my $rowSize = $self->EstimateRowSize($relationName);
1701            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1702                  # Re-create the table without its index.                  # Re-create the table without its index.
1703                  $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1704            # If this is a pre-index DBMS, create the index here.
1705            if ($dbh->{_preIndex}) {
1706                eval {
1707                    $self->CreateIndex($relationName);
1708                };
1709                if ($@) {
1710                    $retVal->AddMessage($@);
1711                }
1712            }
1713          }          }
         # Determine whether or not this is a primary relation. Primary relations have an extra  
         # field indicating whether or not a given object is new or was loaded from the flat files.  
         my $primary = $self->_IsPrimary($relationName);  
         # Get the number of fields in this relation.  
         my @fieldList = @{$relation->{Fields}};  
         my $fieldCount = @fieldList;  
         # Record the number of expected fields.  
         my $expectedFields = $fieldCount + ($primary ? 1 : 0);  
         # Start a database transaction.  
         $dbh->begin_tran;  
         # Open the relation file. We need to create a cleaned-up copy before loading.  
         open TABLEIN, '<', $fileName;  
         my $tempName = "$fileName.tbl";  
         open TABLEOUT, '>', $tempName;  
         # Loop through the file.  
         while (<TABLEIN>) {  
                 # Chop off the new-line character.  
                 my $record = $_;  
                 chomp $record;  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Eliminate any trailing tabs.  
             chop $record while substr($record, -1) eq "\t";  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 $record .= "\t0";  
             }  
             # Write the record.  
             print TABLEOUT "$record\n";  
             # Count the record read.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         }  
         }  
         # Close the files.  
         close TABLEIN;  
         close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(4);  
1714      # Load the table.      # Load the table.
1715          my $rv;          my $rv;
1716          eval {          eval {
1717                  $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1718          };          };
1719          if (!defined $rv) {          if (!defined $rv) {
1720          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1721          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1722                  Trace("Table load failed for $relationName.") if T(1);                  Trace("Table load failed for $relationName.") if T(1);
1723          } else {          } else {
1724                  # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1725                  Trace("$retVal->{records} records read for $relationName.") if T(1);          $retVal->Add("tables");
1726            my $size = -s $fileName;
1727            Trace("$size bytes loaded into $relationName.") if T(2);
1728                  # If we're rebuilding, we need to create the table indexes.                  # If we're rebuilding, we need to create the table indexes.
1729                  if ($truncateFlag) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1730                          eval {                          eval {
1731                                  $self->CreateIndex($relationName);                                  $self->CreateIndex($relationName);
1732                          };                          };
# Line 1057  Line 1735 
1735                          }                          }
1736                  }                  }
1737          }          }
1738          # Commit the database changes.      # Analyze the table to improve performance.
1739          $dbh->commit_tran;      $dbh->vacuum_it($relationName);
         # Delete the temporary file.  
         unlink $tempName;  
1740          # Return the statistics.          # Return the statistics.
1741          return $retVal;          return $retVal;
1742  }  }
1743    
1744  =head3 GenerateEntity  =head3 GenerateEntity
1745    
1746  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1747    
1748  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1749  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1123  Line 1799 
1799          return $this;          return $this;
1800  }  }
1801    
1802    =head3 GetEntity
1803    
1804    C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1805    
1806    Return an object describing the entity instance with a specified ID.
1807    
1808    =over 4
1809    
1810    =item entityType
1811    
1812    Entity type name.
1813    
1814    =item ID
1815    
1816    ID of the desired entity.
1817    
1818    =item RETURN
1819    
1820    Returns a B<DBObject> representing the desired entity instance, or an undefined value if no
1821    instance is found with the specified key.
1822    
1823    =back
1824    
1825    =cut
1826    
1827    sub GetEntity {
1828        # Get the parameters.
1829        my ($self, $entityType, $ID) = @_;
1830        # Create a query.
1831        my $query = $self->Get([$entityType], "$entityType(id) = ?", $ID);
1832        # Get the first (and only) object.
1833        my $retVal = $query->Fetch();
1834        # Return the result.
1835        return $retVal;
1836    }
1837    
1838    =head3 GetEntityValues
1839    
1840    C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1841    
1842    Return a list of values from a specified entity instance.
1843    
1844    =over 4
1845    
1846    =item entityType
1847    
1848    Entity type name.
1849    
1850    =item ID
1851    
1852    ID of the desired entity.
1853    
1854    =item fields
1855    
1856    List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.
1857    
1858    =item RETURN
1859    
1860    Returns a flattened list of the values of the specified fields for the specified entity.
1861    
1862    =back
1863    
1864    =cut
1865    
1866    sub GetEntityValues {
1867        # Get the parameters.
1868        my ($self, $entityType, $ID, $fields) = @_;
1869        # Get the specified entity.
1870        my $entity = $self->GetEntity($entityType, $ID);
1871        # Declare the return list.
1872        my @retVal = ();
1873        # If we found the entity, push the values into the return list.
1874        if ($entity) {
1875            push @retVal, $entity->Values($fields);
1876        }
1877        # Return the result.
1878        return @retVal;
1879    }
1880    
1881    =head3 GetAll
1882    
1883    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1884    
1885    Return a list of values taken from the objects returned by a query. The first three
1886    parameters correspond to the parameters of the L</Get> method. The final parameter is
1887    a list of the fields desired from each record found by the query. The field name
1888    syntax is the standard syntax used for fields in the B<ERDB> system--
1889    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
1890    or relationship and I<fieldName> is the name of the field.
1891    
1892    The list returned will be a list of lists. Each element of the list will contain
1893    the values returned for the fields specified in the fourth parameter. If one of the
1894    fields specified returns multiple values, they are flattened in with the rest. For
1895    example, the following call will return a list of the features in a particular
1896    spreadsheet cell, and each feature will be represented by a list containing the
1897    feature ID followed by all of its aliases.
1898    
1899    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1900    
1901    =over 4
1902    
1903    =item objectNames
1904    
1905    List containing the names of the entity and relationship objects to be retrieved.
1906    
1907    =item filterClause
1908    
1909    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1910    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1911    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1912    parameter list as additional parameters. The fields in a filter clause can come from primary
1913    entity relations, relationship relations, or secondary entity relations; however, all of the
1914    entities and relationships involved must be included in the list of object names.
1915    
1916    =item parameterList
1917    
1918    List of the parameters to be substituted in for the parameters marks in the filter clause.
1919    
1920    =item fields
1921    
1922    List of the fields to be returned in each element of the list returned.
1923    
1924    =item count
1925    
1926    Maximum number of records to return. If omitted or 0, all available records will be returned.
1927    
1928    =item RETURN
1929    
1930    Returns a list of list references. Each element of the return list contains the values for the
1931    fields specified in the B<fields> parameter.
1932    
1933    =back
1934    
1935    =cut
1936    #: Return Type @@;
1937    sub GetAll {
1938        # Get the parameters.
1939        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
1940        # Translate the parameters from a list reference to a list. If the parameter
1941        # list is a scalar we convert it into a singleton list.
1942        my @parmList = ();
1943        if (ref $parameterList eq "ARRAY") {
1944            @parmList = @{$parameterList};
1945        } else {
1946            push @parmList, $parameterList;
1947        }
1948        # Insure the counter has a value.
1949        if (!defined $count) {
1950            $count = 0;
1951        }
1952        # Add the row limit to the filter clause.
1953        if ($count > 0) {
1954            $filterClause .= " LIMIT $count";
1955        }
1956        # Create the query.
1957        my $query = $self->Get($objectNames, $filterClause, @parmList);
1958        # Set up a counter of the number of records read.
1959        my $fetched = 0;
1960        # Loop through the records returned, extracting the fields. Note that if the
1961        # counter is non-zero, we stop when the number of records read hits the count.
1962        my @retVal = ();
1963        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
1964            my @rowData = $row->Values($fields);
1965            push @retVal, \@rowData;
1966            $fetched++;
1967        }
1968        # Return the resulting list.
1969        return @retVal;
1970    }
1971    
1972    =head3 EstimateRowSize
1973    
1974    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1975    
1976    Estimate the row size of the specified relation. The estimated row size is computed by adding
1977    up the average length for each data type.
1978    
1979    =over 4
1980    
1981    =item relName
1982    
1983    Name of the relation whose estimated row size is desired.
1984    
1985    =item RETURN
1986    
1987    Returns an estimate of the row size for the specified relation.
1988    
1989    =back
1990    
1991    =cut
1992    #: Return Type $;
1993    sub EstimateRowSize {
1994        # Get the parameters.
1995        my ($self, $relName) = @_;
1996        # Declare the return variable.
1997        my $retVal = 0;
1998        # Find the relation descriptor.
1999        my $relation = $self->_FindRelation($relName);
2000        # Get the list of fields.
2001        for my $fieldData (@{$relation->{Fields}}) {
2002            # Get the field type and add its length.
2003            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
2004            $retVal += $fieldLen;
2005        }
2006        # Return the result.
2007        return $retVal;
2008    }
2009    
2010    =head3 GetFieldTable
2011    
2012    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
2013    
2014    Get the field structure for a specified entity or relationship.
2015    
2016    =over 4
2017    
2018    =item objectName
2019    
2020    Name of the desired entity or relationship.
2021    
2022    =item RETURN
2023    
2024    The table containing the field descriptors for the specified object.
2025    
2026    =back
2027    
2028    =cut
2029    
2030    sub GetFieldTable {
2031        # Get the parameters.
2032        my ($self, $objectName) = @_;
2033        # Get the descriptor from the metadata.
2034        my $objectData = $self->_GetStructure($objectName);
2035        # Return the object's field table.
2036        return $objectData->{Fields};
2037    }
2038    
2039    =head3 GetUsefulCrossValues
2040    
2041    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
2042    
2043    Return a list of the useful attributes that would be returned by a B<Cross> call
2044    from an entity of the source entity type through the specified relationship. This
2045    means it will return the fields of the target entity type and the intersection data
2046    fields in the relationship. Only primary table fields are returned. In other words,
2047    the field names returned will be for fields where there is always one and only one
2048    value.
2049    
2050    =over 4
2051    
2052    =item sourceEntity
2053    
2054    Name of the entity from which the relationship crossing will start.
2055    
2056    =item relationship
2057    
2058    Name of the relationship being crossed.
2059    
2060    =item RETURN
2061    
2062    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2063    
2064    =back
2065    
2066    =cut
2067    #: Return Type @;
2068    sub GetUsefulCrossValues {
2069        # Get the parameters.
2070        my ($self, $sourceEntity, $relationship) = @_;
2071        # Declare the return variable.
2072        my @retVal = ();
2073        # Determine the target entity for the relationship. This is whichever entity is not
2074        # the source entity. So, if the source entity is the FROM, we'll get the name of
2075        # the TO, and vice versa.
2076        my $relStructure = $self->_GetStructure($relationship);
2077        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2078        my $targetEntity = $relStructure->{$targetEntityType};
2079        # Get the field table for the entity.
2080        my $entityFields = $self->GetFieldTable($targetEntity);
2081        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2082        # For the entity fields, the key aspect of the target structure is that the {relation} value
2083        # must match the entity name.
2084        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2085                            keys %{$entityFields};
2086        # Push the fields found onto the return variable.
2087        push @retVal, sort @fieldList;
2088        # Get the field table for the relationship.
2089        my $relationshipFields = $self->GetFieldTable($relationship);
2090        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2091        # This may end up being an empty set.
2092        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2093                            keys %{$relationshipFields};
2094        # Push these onto the return list.
2095        push @retVal, sort @fieldList2;
2096        # Return the result.
2097        return @retVal;
2098    }
2099    
2100  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2101    
# Line 1135  Line 2108 
2108  =cut  =cut
2109    
2110  sub _GetLoadStats {  sub _GetLoadStats {
2111          return Stats->new('records');      return Stats->new();
2112  }  }
2113    
2114  =head3 GenerateFields  =head3 GenerateFields
# Line 1330  Line 2303 
2303          return $objectData->{Relations};          return $objectData->{Relations};
2304  }  }
2305    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
         # Get the parameters.  
         my ($self, $objectName) = @_;  
         # Get the descriptor from the metadata.  
         my $objectData = $self->_GetStructure($objectName);  
         # Return the object's field table.  
         return $objectData->{Fields};  
 }  
   
2306  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2307    
2308  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
# Line 1499  Line 2443 
2443  sub _LoadMetaData {  sub _LoadMetaData {
2444          # Get the parameters.          # Get the parameters.
2445          my ($filename) = @_;          my ($filename) = @_;
2446        Trace("Reading Sprout DBD from $filename.") if T(2);
2447          # Slurp the XML file into a variable. Extensive use of options is used to insure we          # Slurp the XML file into a variable. Extensive use of options is used to insure we
2448          # get the exact structure we want.          # get the exact structure we want.
2449          my $metadata = XML::Simple::XMLin($filename,          my $metadata = XML::Simple::XMLin($filename,
# Line 1523  Line 2468 
2468          my %masterRelationTable = ();          my %masterRelationTable = ();
2469          # Loop through the entities.          # Loop through the entities.
2470          my $entityList = $metadata->{Entities};          my $entityList = $metadata->{Entities};
2471          while (my ($entityName, $entityStructure) = each %{$entityList}) {      for my $entityName (keys %{$entityList}) {
2472            my $entityStructure = $entityList->{$entityName};
2473                  #                  #
2474                  # The first step is to run creating all the entity's default values. For C<Field> elements,          # The first step is to create all the entity's default values. For C<Field> elements,
2475                  # the relation name must be added where it is not specified. For relationships,                  # the relation name must be added where it is not specified. For relationships,
2476                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>
2477                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute
# Line 1571  Line 2517 
2517                  # to a list of fields. First, we need the ID field itself.                  # to a list of fields. First, we need the ID field itself.
2518                  my $idField = $fieldList->{id};                  my $idField = $fieldList->{id};
2519                  # Loop through the relations.                  # Loop through the relations.
2520                  while (my ($relationName, $relation) = each %{$relationTable}) {          for my $relationName (keys %{$relationTable}) {
2521                my $relation = $relationTable->{$relationName};
2522                          # Get the relation's field list.                          # Get the relation's field list.
2523                          my $relationFieldList = $relation->{Fields};                          my $relationFieldList = $relation->{Fields};
2524                          # Add the ID field to it. If the field's already there, it will not make any                          # Add the ID field to it. If the field's already there, it will not make any
# Line 1621  Line 2568 
2568                  # The next step is to insure that each relation has at least one index that begins with the ID field.                  # The next step is to insure that each relation has at least one index that begins with the ID field.
2569                  # After that, we convert each relation's index list to an index table. We first need to loop through                  # After that, we convert each relation's index list to an index table. We first need to loop through
2570                  # the relations.                  # the relations.
2571                  while (my ($relationName, $relation) = each %{$relationTable}) {          for my $relationName (keys %{$relationTable}) {
2572                my $relation = $relationTable->{$relationName};
2573                          # Get the relation's index list.                          # Get the relation's index list.
2574                          my $indexList = $relation->{Indexes};                          my $indexList = $relation->{Indexes};
2575                          # Insure this relation has an ID index.                          # Insure this relation has an ID index.
# Line 1652  Line 2600 
2600          # Loop through the relationships. Relationships actually turn out to be much simpler than entities.          # Loop through the relationships. Relationships actually turn out to be much simpler than entities.
2601          # For one thing, there is only a single constituent relation.          # For one thing, there is only a single constituent relation.
2602          my $relationshipList = $metadata->{Relationships};          my $relationshipList = $metadata->{Relationships};
2603          while (my ($relationshipName, $relationshipStructure) = each %{$relationshipList}) {      for my $relationshipName (keys %{$relationshipList}) {
2604            my $relationshipStructure = $relationshipList->{$relationshipName};
2605                  # Fix up this relationship.                  # Fix up this relationship.
2606                  _FixupFields($relationshipStructure, $relationshipName, 2, 3);                  _FixupFields($relationshipStructure, $relationshipName, 2, 3);
2607                  # Format a description for the FROM field.                  # Format a description for the FROM field.
# Line 1701  Line 2650 
2650                  my @fromList = ();                  my @fromList = ();
2651                  my @toList = ();                  my @toList = ();
2652                  my @bothList = ();                  my @bothList = ();
2653                  while (my ($relationshipName, $relationship) = each %{$relationshipList}) {          Trace("Join table build for $entityName.") if T(metadata => 4);
2654            for my $relationshipName (keys %{$relationshipList}) {
2655                my $relationship = $relationshipList->{$relationshipName};
2656                          # Determine if this relationship has our entity in one of its link fields.                          # Determine if this relationship has our entity in one of its link fields.
2657                          if ($relationship->{from} eq $entityName) {              my $fromEntity = $relationship->{from};
2658                                  if ($relationship->{to} eq $entityName) {              my $toEntity = $relationship->{to};
2659                Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
2660                if ($fromEntity eq $entityName) {
2661                    if ($toEntity eq $entityName) {
2662                                          # Here the relationship is recursive.                                          # Here the relationship is recursive.
2663                                          push @bothList, $relationshipName;                                          push @bothList, $relationshipName;
2664                        Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2665                                  } else {                                  } else {
2666                                          # Here the relationship comes from the entity.                                          # Here the relationship comes from the entity.
2667                                          push @fromList, $relationshipName;                                          push @fromList, $relationshipName;
2668                        Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2669                                  }                                  }
2670                          } elsif ($relationship->{to} eq $entityName) {              } elsif ($toEntity eq $entityName) {
2671                                  # Here the relationship goes to the entity.                                  # Here the relationship goes to the entity.
2672                                  push @toList, $relationshipName;                                  push @toList, $relationshipName;
2673                    Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2674                          }                          }
2675                  }                  }
2676                  # Create the nonrecursive joins. Note that we build two hashes for running                  # Create the nonrecursive joins. Note that we build two hashes for running
# Line 1722  Line 2679 
2679                  # hash table at the same time.                  # hash table at the same time.
2680                  my %directRelationships = ( from => \@fromList, to => \@toList );                  my %directRelationships = ( from => \@fromList, to => \@toList );
2681                  my %otherRelationships = ( from => \@fromList, to => \@toList );                  my %otherRelationships = ( from => \@fromList, to => \@toList );
2682                  while (my ($linkType, $relationships) = each %directRelationships) {          for my $linkType (keys %directRelationships) {
2683                my $relationships = $directRelationships{$linkType};
2684                          # Loop through all the relationships.                          # Loop through all the relationships.
2685                          for my $relationshipName (@{$relationships}) {                          for my $relationshipName (@{$relationships}) {
2686                                  # Create joins between the entity and this relationship.                                  # Create joins between the entity and this relationship.
2687                                  my $linkField = "$relationshipName.${linkType}_link";                                  my $linkField = "$relationshipName.${linkType}_link";
2688                                  my $joinClause = "$entityName.id = $linkField";                                  my $joinClause = "$entityName.id = $linkField";
2689                    Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2690                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2691                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2692                                  # Create joins between this relationship and the other relationships.                                  # Create joins between this relationship and the other relationships.
2693                                  while (my ($otherType, $otherships) = each %otherRelationships) {                  for my $otherType (keys %otherRelationships) {
2694                        my $otherships = $otherRelationships{$otherType};
2695                                          for my $otherName (@{$otherships}) {                                          for my $otherName (@{$otherships}) {
2696                                                  # Get the key for this join.                                                  # Get the key for this join.
2697                                                  my $joinKey = "$otherName/$relationshipName";                                                  my $joinKey = "$otherName/$relationshipName";
# Line 1741  Line 2701 
2701                                                          # path is ambiguous. We delete the join from the join                                                          # path is ambiguous. We delete the join from the join
2702                                                          # table to prevent it from being used.                                                          # table to prevent it from being used.
2703                                                          delete $joinTable{$joinKey};                                                          delete $joinTable{$joinKey};
2704                                Trace("Deleting ambiguous join $joinKey.") if T(4);
2705                                                  } elsif ($otherName ne $relationshipName) {                                                  } elsif ($otherName ne $relationshipName) {
2706                                                          # Here we have a valid join. Note that joins between a                                                          # Here we have a valid join. Note that joins between a
2707                                                          # relationship and itself are prohibited.                                                          # relationship and itself are prohibited.
2708                                                          $joinTable{$joinKey} = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2709                                $joinTable{$joinKey} = $relJoinClause;
2710                                Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2711                                                  }                                                  }
2712                                          }                                          }
2713                                  }                                  }
# Line 1753  Line 2716 
2716                                  # relationship can only be ambiguous with another recursive relationship,                                  # relationship can only be ambiguous with another recursive relationship,
2717                                  # and the incoming relationship from the outer loop is never recursive.                                  # and the incoming relationship from the outer loop is never recursive.
2718                                  for my $otherName (@bothList) {                                  for my $otherName (@bothList) {
2719                        Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2720                                          # Join from the left.                                          # Join from the left.
2721                                          $joinTable{"$relationshipName/$otherName"} =                                          $joinTable{"$relationshipName/$otherName"} =
2722                                                  "$linkField = $otherName.from_link";                                                  "$linkField = $otherName.from_link";
# Line 1767  Line 2731 
2731                  # rise to situations where we can't create the path we want; however, it is always                  # rise to situations where we can't create the path we want; however, it is always
2732                  # possible to get the same effect using multiple queries.                  # possible to get the same effect using multiple queries.
2733                  for my $relationshipName (@bothList) {                  for my $relationshipName (@bothList) {
2734                Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2735                          # Join to the entity from each direction.                          # Join to the entity from each direction.
2736                          $joinTable{"$entityName/$relationshipName"} =                          $joinTable{"$entityName/$relationshipName"} =
2737                                  "$entityName.id = $relationshipName.from_link";                                  "$entityName.id = $relationshipName.from_link";
# Line 1780  Line 2745 
2745          return $metadata;          return $metadata;
2746  }  }
2747    
2748    =head3 SortNeeded
2749    
2750    C<< my $flag = $erdb->SortNeeded($relationName); >>
2751    
2752    Return TRUE if the specified relation should be sorted during loading to remove duplicate keys,
2753    else FALSE.
2754    
2755    =over 4
2756    
2757    =item relationName
2758    
2759    Name of the relation to be examined.
2760    
2761    =item RETURN
2762    
2763    Returns TRUE if the relation needs a sort, else FALSE.
2764    
2765    =back
2766    
2767    =cut
2768    #: Return Type $;
2769    sub SortNeeded {
2770        # Get the parameters.
2771        my ($self, $relationName) = @_;
2772        # Declare the return variable.
2773        my $retVal = 0;
2774        # Find out if the relation is a primary entity relation.
2775        my $entityTable = $self->{_metaData}->{Entities};
2776        if (exists $entityTable->{$relationName}) {
2777            my $keyType = $entityTable->{$relationName}->{keyType};
2778            Trace("Relation $relationName found in entity table with key type $keyType.") if T(3);
2779            # If the key is not a hash string, we must do the sort.
2780            if ($keyType ne 'hash-string') {
2781                $retVal = 1;
2782            }
2783        }
2784        # Return the result.
2785        return $retVal;
2786    }
2787    
2788  =head3 CreateRelationshipIndex  =head3 CreateRelationshipIndex
2789    
2790  Create an index for a relationship's relation.  Create an index for a relationship's relation.
# Line 1817  Line 2822 
2822          # index descriptor does not exist, it will be created automatically so we can add          # index descriptor does not exist, it will be created automatically so we can add
2823          # the field to it.          # the field to it.
2824          unshift @{$newIndex->{IndexFields}}, $firstField;          unshift @{$newIndex->{IndexFields}}, $firstField;
2825        # If this is a one-to-many relationship, the "To" index is unique.
2826        if ($relationshipStructure->{arity} eq "1M" && $indexKey eq "To") {
2827            $newIndex->{Unique} = 'true';
2828        }
2829          # Add the index to the relation.          # Add the index to the relation.
2830          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
2831  }  }
# Line 1906  Line 2915 
2915                  $structure->{Fields} = { };                  $structure->{Fields} = { };
2916          } else {          } else {
2917                  # Here we have a field list. Loop through its fields.                  # Here we have a field list. Loop through its fields.
2918                  while (my ($fieldName, $fieldData) = each %{$structure->{Fields}}) {          my $fieldStructures = $structure->{Fields};
2919            for my $fieldName (keys %{$fieldStructures}) {
2920                Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
2921                my $fieldData = $fieldStructures->{$fieldName};
2922                          # Get the field type.                          # Get the field type.
2923                          my $type = $fieldData->{type};                          my $type = $fieldData->{type};
2924                          # Plug in a relation name if it is needed.                          # Plug in a relation name if it is needed.

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.44

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3