[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Wed May 4 03:24:43 2005 UTC revision 1.42, Wed Apr 19 03:34:15 2006 UTC
# Line 2  Line 2 
2    
3          use strict;          use strict;
4          use Tracer;          use Tracer;
5          use DBKernel;      use DBrtns;
6          use Data::Dumper;          use Data::Dumper;
7          use XML::Simple;          use XML::Simple;
8          use DBQuery;          use DBQuery;
9          use DBObject;          use DBObject;
10          use Stats;          use Stats;
11          use Time::HiRes qw(gettimeofday);          use Time::HiRes qw(gettimeofday);
12        use Digest::MD5 qw(md5_base64);
13        use FIG;
14    
15  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
16    
# Line 32  Line 34 
34  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).
35  The B<FEATURE> entity also contains an optional virulence number. This is implemented  The B<FEATURE> entity also contains an optional virulence number. This is implemented
36  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number
37  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in the  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in
38  C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence number.  the C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence
39  If the virulence of I<ABC> is not known, there will not be any rows for it in C<FeatureVirulence>.  number. If the virulence of I<ABC> is not known, there will not be any rows for it in
40    C<FeatureVirulence>.
41    
42  Entities are connected by binary relationships implemented using single relations possessing the  Entities are connected by binary relationships implemented using single relations possessing the
43  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),
# Line 69  Line 72 
72  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet
73  fully implemented.  fully implemented.
74    
75    =head2 XML Database Description
76    
77    =head3 Data Types
78    
79    The ERDB system supports the following data types. Note that there are numerous string
80    types depending on the maximum length. Some database packages limit the total number of
81    characters you have in an index key; to insure the database works in all environments,
82    the type of string should be the shortest one possible that supports all the known values.
83    
84    =over 4
85    
86    =item char
87    
88    single ASCII character
89    
90    =item int
91    
92    32-bit signed integer
93    
94    =item date
95    
96    64-bit unsigned integer, representing a PERL date/time value
97    
98    =item text
99    
100    long string; Text fields cannot be used in indexes or sorting and do not support the
101    normal syntax of filter clauses, but can be up to a billion character in length
102    
103    =item float
104    
105    double-precision floating-point number
106    
107    =item boolean
108    
109    single-bit numeric value; The value is stored as a 16-bit signed integer (for
110    compatability with certain database packages), but the only values supported are
111    0 and 1.
112    
113    =item key-string
114    
115    variable-length string, maximum 40 characters
116    
117    =item name-string
118    
119    variable-length string, maximum 80 characters
120    
121    =item medium-string
122    
123    variable-length string, maximum 160 characters
124    
125    =item string
126    
127    variable-length string, maximum 255 characters
128    
129    =item hash-string
130    
131    variable-length string, maximum 22 characters
132    
133    =back
134    
135    The hash-string data type has a special meaning. The actual key passed into the loader will
136    be a string, but it will be digested into a 22-character MD5 code to save space. Although the
137    MD5 algorithm is not perfect, it is extremely unlikely two strings will have the same
138    digest. Therefore, it is presumed the keys will be unique. When the database is actually
139    in use, the hashed keys will be presented rather than the original values. For this reason,
140    they should not be used for entities where the key is meaningful.
141    
142    =head3 Global Tags
143    
144    The entire database definition must be inside a B<Database> tag. The display name of
145    the database is given by the text associated with the B<Title> tag. The display name
146    is only used in the automated documentation. It has no other effect. The entities and
147    relationships are listed inside the B<Entities> and B<Relationships> tags,
148    respectively. None of these tags have attributes.
149    
150        <Database>
151            <Title>... display title here...</Title>
152            <Entities>
153                ... entity definitions here ...
154            </Entities>
155            <Relationships>
156                ... relationship definitions here...
157            </Relationships>
158        </Database>
159    
160    Entities, relationships, indexes, and fields all allow a text tag called B<Notes>.
161    The text inside the B<Notes> tag contains comments that will appear when the database
162    documentation is generated. Within a B<Notes> tag, you may use C<[i]> and C<[/i]> for
163    italics, C<[b]> and C<[/b]> for bold, and C<[p]> for a new paragraph.
164    
165    =head3 Fields
166    
167    Both entities and relationships have fields described by B<Field> tags. A B<Field>
168    tag can have B<Notes> associated with it. The complete set of B<Field> tags for an
169    object mus be inside B<Fields> tags.
170    
171        <Entity ... >
172            <Fields>
173                ... Field tags ...
174            </Fields>
175        </Entity>
176    
177    The attributes for the B<Field> tag are as follows.
178    
179    =over 4
180    
181    =item name
182    
183    Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
184    and the first character should be a letter. Most underlying databases are case-insensitive
185    with the respect to field names, so a best practice is to use lower-case letters only.
186    
187    =item type
188    
189    Data type of the field. The legal data types are given above.
190    
191    =item relation
192    
193    Name of the relation containing the field. This should only be specified for entity
194    fields. The ERDB system does not support optional fields or multi-occurring fields
195    in the primary relation of an entity. Instead, they are put into secondary relations.
196    So, for example, in the C<Genome> entity, the C<group-name> field indicates a special
197    grouping used to select a subset of the genomes. A given genome may not be in any
198    groups or may be in multiple groups. Therefore, C<group-name> specifies a relation
199    value. The relation name specified must be a valid table name. By convention, it is
200    usually the entity name followed by a qualifying word (e.g. C<GenomeGroup>). In an
201    entity, the fields without a relation attribute are said to belong to the
202    I<primary relation>. This relation has the same name as the entity itself.
203    
204    =back
205    
206    =head3 Indexes
207    
208    An entity can have multiple alternate indexes associated with it. The fields must
209    be from the primary relation. The alternate indexes assist in ordering results
210    from a query. A relationship can have up to two indexes-- a I<to-index> and a
211    I<from-index>. These order the results when crossing the relationship. For
212    example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the
213    from-index would order the contigs of a ganome, and the to-index would order
214    the genomes of a contig. A relationship's index must specify only fields in
215    the relationship.
216    
217    The indexes for an entity must be listed inside the B<Indexes> tag. The from-index
218    of a relationship is specified using the B<FromIndex> tag; the to-index is specified
219    using the B<ToIndex> tag.
220    
221    Each index can contain a B<Notes> tag. In addition, it will have an B<IndexFields>
222    tag containing the B<IndexField> tags. These specify, in order, the fields used in
223    the index. The attributes of an B<IndexField> tag are as follows.
224    
225    =over 4
226    
227    =item name
228    
229    Name of the field.
230    
231    =item order
232    
233    Sort order of the field-- C<ascending> or C<descending>.
234    
235    =back
236    
237    The B<Index>, B<FromIndex>, and B<ToIndex> tags themselves have no attributes.
238    
239    =head3 Object and Field Names
240    
241    By convention entity and relationship names use capital casing (e.g. C<Genome> or
242    C<HasRegionsIn>. Most underlying databases, however, are aggressively case-insensitive
243    with respect to relation names, converting them internally to all-upper case or
244    all-lower case.
245    
246    If syntax or parsing errors occur when you try to load or use an ERDB database, the
247    most likely reason is that one of your objects has an SQL reserved word as its name.
248    The list of SQL reserved words keeps increasing; however, most are unlikely to show
249    up as a noun or declarative verb phrase. The exceptions are C<Group>, C<User>,
250    C<Table>, C<Index>, C<Object>, C<Date>, C<Number>, C<Update>, C<Time>, C<Percent>,
251    C<Memo>, C<Order>, and C<Sum>. This problem can crop up in field names as well.
252    
253    Every entity has a field called C<id> that acts as its primary key. Every relationship
254    has fields called C<from-link> and C<to-link> that contain copies of the relevant
255    entity IDs. These are essentially ERDB's reserved words, and should not be used
256    for user-defined field names.
257    
258    =head3 Entities
259    
260    An entity is described by the B<Entity> tag. The entity can contain B<Notes>, an
261    B<Indexes> tag containing one or more secondary indexes, and a B<Fields> tag
262    containing one or more fields. The attributes of the B<Entity> tag are as follows.
263    
264    =over 4
265    
266    =item name
267    
268    Name of the entity. The entity name, by convention, uses capital casing (e.g. C<Genome>
269    or C<GroupBlock>) and should be a noun or noun phrase.
270    
271    =item keyType
272    
273    Data type of the primary key. The primary key is always named C<id>.
274    
275    =back
276    
277    =head3 Relationships
278    
279    A relationship is described by the C<Relationship> tag. Within a relationship,
280    there can be a C<Notes> tag, a C<Fields> tag containing the intersection data
281    fields, a C<FromIndex> tag containing the from-index, and a C<ToIndex> tag containing
282    the to-index.
283    
284    The C<Relationship> tag has the following attributes.
285    
286    =over 4
287    
288    =item name
289    
290    Name of the relationship. The relationship name, by convention, uses capital casing
291    (e.g. C<ContainsRegionIn> or C<HasContig>), and should be a declarative verb
292    phrase, designed to fit between the from-entity and the to-entity (e.g.
293    Block C<ContainsRegionIn> Genome).
294    
295    =item from
296    
297    Name of the entity from which the relationship starts.
298    
299    =item to
300    
301    Name of the entity to which the relationship proceeds.
302    
303    =item arity
304    
305    Relationship type: C<1M> for one-to-many and C<MM> for many-to-many.
306    
307    =back
308    
309  =cut  =cut
310    
311  # GLOBALS  # GLOBALS
# Line 76  Line 313 
313  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
314  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
315  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
316   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
317  my %TypeTable = ( char =>        { sqlType => 'CHAR(1)',                        maxLen => 1,                    dataGen => "StringGen('A')" },  # record sizes.
318                                    int =>         { sqlType => 'INTEGER',                        maxLen => 20,                   dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
319                                    string =>  { sqlType => 'VARCHAR(255)',               maxLen => 255,                  dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
320                                    text =>        { sqlType => 'TEXT',                           maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
321                                    date =>        { sqlType => 'BIGINT',                         maxLen => 80,                   dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
322                                    float =>       { sqlType => 'DOUBLE PRECISION',       maxLen => 40,                   dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
323                                    boolean => { sqlType => 'SMALLINT',                   maxLen => 1,                    dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
324                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
325                     'hash-string' =>
326                                 { sqlType => 'VARCHAR(22)',        maxLen => 22,           avgLen =>  22, dataGen => "SringGen(22)" },
327                               'key-string' =>                               'key-string' =>
328                                                           { sqlType => 'VARCHAR(40)',            maxLen => 40,                   dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
329                                   'name-string' =>                                   'name-string' =>
330                                                           { sqlType => 'VARCHAR(80)',            maxLen => 80,                   dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
331                                   'medium-string' =>                                   'medium-string' =>
332                                                           { sqlType => 'VARCHAR(160)',           maxLen => 160,                  dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
333                                  );                                  );
334    
335  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 145  Line 385 
385    
386  =head3 ShowMetaData  =head3 ShowMetaData
387    
388  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
389    
390  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
391  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 282  Line 522 
522                  # Separate out the source, the target, and the join clause.                  # Separate out the source, the target, and the join clause.
523                  $joinKey =~ m!^([^/]+)/(.+)$!;                  $joinKey =~ m!^([^/]+)/(.+)$!;
524                  my ($sourceRelation, $targetRelation) = ($1, $2);                  my ($sourceRelation, $targetRelation) = ($1, $2);
525                  Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
526                  my $source = $self->ComputeObjectSentence($sourceRelation);                  my $source = $self->ComputeObjectSentence($sourceRelation);
527                  my $target = $self->ComputeObjectSentence($targetRelation);                  my $target = $self->ComputeObjectSentence($targetRelation);
528                  my $clause = $joinTable->{$joinKey};                  my $clause = $joinTable->{$joinKey};
# Line 300  Line 540 
540    
541  =head3 DumpMetaData  =head3 DumpMetaData
542    
543  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
544    
545  Return a dump of the metadata structure.  Return a dump of the metadata structure.
546    
# Line 315  Line 555 
555    
556  =head3 CreateTables  =head3 CreateTables
557    
558  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
559    
560  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
561  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 327  Line 567 
567  sub CreateTables {  sub CreateTables {
568          # Get the parameters.          # Get the parameters.
569          my ($self) = @_;          my ($self) = @_;
570          my $metadata = $self->{_metaData};      # Get the relation names.
571          my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
572          # Loop through the entities.      # Loop through the relations.
573          my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
         for my $entityName (keys %{$entityHash}) {  
                 my $entityData = $entityHash->{$entityName};  
                 # Tell the user what we're doing.  
                 Trace("Creating relations for entity $entityName.") if T(1);  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entityData->{Relations}}) {  
574                          # Create a table for this relation.                          # Create a table for this relation.
575                          $self->CreateTable($relationName);                          $self->CreateTable($relationName);
576                          Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
                 }  
         }  
         # Loop through the relationships.  
         my $relationshipTable = $metadata->{Relationships};  
         for my $relationshipName (keys %{$metadata->{Relationships}}) {  
                 # Create a table for this relationship.  
                 Trace("Creating relationship $relationshipName.") if T(1);  
                 $self->CreateTable($relationshipName);  
577          }          }
578  }  }
579    
580  =head3 CreateTable  =head3 CreateTable
581    
582  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
583    
584  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
585    
# Line 363  Line 589 
589    
590  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
591    
592  =item $indexFlag  =item indexFlag
593    
594  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
595  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
596    
597    =item estimatedRows (optional)
598    
599    If specified, the estimated maximum number of rows for the relation. This
600    information allows the creation of tables using storage engines that are
601    faster but require size estimates, such as MyISAM.
602    
603  =back  =back
604    
605  =cut  =cut
606    
607  sub CreateTable {  sub CreateTable {
608          # Get the parameters.          # Get the parameters.
609          my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
610          # Get the database handle.          # Get the database handle.
611          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
612          # Get the relation data and determine whether or not the relation is primary.          # Get the relation data and determine whether or not the relation is primary.
# Line 398  Line 630 
630          # Insure the table is not already there.          # Insure the table is not already there.
631          $dbh->drop_table(tbl => $relationName);          $dbh->drop_table(tbl => $relationName);
632          Trace("Table $relationName dropped.") if T(2);          Trace("Table $relationName dropped.") if T(2);
633        # If there are estimated rows, create an estimate so we can take advantage of
634        # faster DB technologies.
635        my $estimation = undef;
636        if ($estimatedRows) {
637            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
638        }
639          # Create the table.          # Create the table.
640          Trace("Creating table $relationName: $fieldThing") if T(2);          Trace("Creating table $relationName: $fieldThing") if T(2);
641          $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
642          Trace("Relation $relationName created in database.") if T(2);          Trace("Relation $relationName created in database.") if T(2);
643          # If we want to build the indexes, we do it here.          # If we want to build the indexes, we do it here.
644          if ($indexFlag) {          if ($indexFlag) {
# Line 408  Line 646 
646          }          }
647  }  }
648    
649    =head3 VerifyFields
650    
651    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
652    
653    Run through the list of proposed field values, insuring that all the character fields are
654    below the maximum length. If any fields are too long, they will be truncated in place.
655    
656    =over 4
657    
658    =item relName
659    
660    Name of the relation for which the specified fields are destined.
661    
662    =item fieldList
663    
664    Reference to a list, in order, of the fields to be put into the relation.
665    
666    =item RETURN
667    
668    Returns the number of fields truncated.
669    
670    =back
671    
672    =cut
673    
674    sub VerifyFields {
675        # Get the parameters.
676        my ($self, $relName, $fieldList) = @_;
677        # Initialize the return value.
678        my $retVal = 0;
679        # Get the relation definition.
680        my $relData = $self->_FindRelation($relName);
681        # Get the list of field descriptors.
682        my $fieldTypes = $relData->{Fields};
683        my $fieldCount = scalar @{$fieldTypes};
684        # Loop through the two lists.
685        for (my $i = 0; $i < $fieldCount; $i++) {
686            # Get the type of the current field.
687            my $fieldType = $fieldTypes->[$i]->{type};
688            # If it's a character field, verify the length.
689            if ($fieldType =~ /string/) {
690                my $maxLen = $TypeTable{$fieldType}->{maxLen};
691                my $oldString = $fieldList->[$i];
692                if (length($oldString) > $maxLen) {
693                    # Here it's too big, so we truncate it.
694                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
695                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
696                    $retVal++;
697                }
698            }
699        }
700        # Return the truncation count.
701        return $retVal;
702    }
703    
704    =head3 DigestFields
705    
706    C<< $erdb->DigestFields($relName, $fieldList); >>
707    
708    Digest the strings in the field list that correspond to data type C<hash-string> in the
709    specified relation.
710    
711    =over 4
712    
713    =item relName
714    
715    Name of the relation to which the fields belong.
716    
717    =item fieldList
718    
719    List of field contents to be loaded into the relation.
720    
721    =back
722    
723    =cut
724    #: Return Type ;
725    sub DigestFields {
726        # Get the parameters.
727        my ($self, $relName, $fieldList) = @_;
728        # Get the relation definition.
729        my $relData = $self->_FindRelation($relName);
730        # Get the list of field descriptors.
731        my $fieldTypes = $relData->{Fields};
732        my $fieldCount = scalar @{$fieldTypes};
733        # Loop through the two lists.
734        for (my $i = 0; $i < $fieldCount; $i++) {
735            # Get the type of the current field.
736            my $fieldType = $fieldTypes->[$i]->{type};
737            # If it's a hash string, digest it in place.
738            if ($fieldType eq 'hash-string') {
739                $fieldList->[$i] = md5_base64($fieldList->[$i]);
740            }
741        }
742    }
743    
744  =head3 CreateIndex  =head3 CreateIndex
745    
746  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
747    
748  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
749  is the case in L</LoadTable>), it is best to create the indexes after the load. If that is  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
750  the case, then L</CreateTable> should be called with the index flag set to FALSE, and this  If that is the case, then L</CreateTable> should be called with the index flag set to
751  method used after the load to create the indexes for the table.  FALSE, and this method used after the load to create the indexes for the table.
752    
753  =cut  =cut
754    
# Line 436  Line 769 
769                  # Get the index's uniqueness flag.                  # Get the index's uniqueness flag.
770                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
771                  # Create the index.                  # Create the index.
772                  $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
773                                        flds => $flds, unique => $unique);
774            if ($rv) {
775                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);
776            } else {
777                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
778            }
779          }          }
780  }  }
781    
782  =head3 LoadTables  =head3 LoadTables
783    
784  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
785    
786  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
787  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 486  Line 824 
824          $directoryName =~ s!/\\$!!;          $directoryName =~ s!/\\$!!;
825          # Declare the return variable.          # Declare the return variable.
826          my $retVal = Stats->new();          my $retVal = Stats->new();
827          # Get the metadata structure.      # Get the relation names.
828          my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
829          # Loop through the entities.      for my $relationName (@relNames) {
         for my $entity (values %{$metaData->{Entities}}) {  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entity->{Relations}}) {  
830                          # Try to load this relation.                          # Try to load this relation.
831                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
832                          # Accumulate the statistics.                          # Accumulate the statistics.
833                          $retVal->Accumulate($result);                          $retVal->Accumulate($result);
834                  }                  }
         }  
         # Loop through the relationships.  
         for my $relationshipName (keys %{$metaData->{Relationships}}) {  
                 # Try to load this relationship's relation.  
                 my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
                 # Accumulate the statistics.  
                 $retVal->Accumulate($result);  
         }  
835          # Add the duration of the load to the statistical object.          # Add the duration of the load to the statistical object.
836          $retVal->Add('duration', gettimeofday - $startTime);          $retVal->Add('duration', gettimeofday - $startTime);
837          # Return the accumulated statistics.          # Return the accumulated statistics.
838          return $retVal;          return $retVal;
839  }  }
840    
841    
842  =head3 GetTableNames  =head3 GetTableNames
843    
844  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
845    
846  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
847    
# Line 530  Line 858 
858    
859  =head3 GetEntityTypes  =head3 GetEntityTypes
860    
861  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
862    
863  Return a list of the entity type names.  Return a list of the entity type names.
864    
# Line 545  Line 873 
873          return sort keys %{$entityList};          return sort keys %{$entityList};
874  }  }
875    
876    =head3 IsEntity
877    
878    C<< my $flag = $erdb->IsEntity($entityName); >>
879    
880    Return TRUE if the parameter is an entity name, else FALSE.
881    
882    =over 4
883    
884    =item entityName
885    
886    Object name to be tested.
887    
888    =item RETURN
889    
890    Returns TRUE if the specified string is an entity name, else FALSE.
891    
892    =back
893    
894    =cut
895    
896    sub IsEntity {
897        # Get the parameters.
898        my ($self, $entityName) = @_;
899        # Test to see if it's an entity.
900        return exists $self->{_metaData}->{Entities}->{$entityName};
901    }
902    
903  =head3 Get  =head3 Get
904    
905  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
906    
907  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
908  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 555  Line 910 
910  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
911  $genus.  $genus.
912    
913  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
914    
915  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
916  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
917    
918  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
919    
920  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
921  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 572  Line 927 
927  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
928  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
929    
930  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
931    
932  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
933  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
934  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
935  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
936  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
937  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
938  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
939    
940    If an entity or relationship is mentioned twice, the name for the second occurrence will
941    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
942    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
943    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
944    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
945    
946  =over 4  =over 4
947    
948  =item objectNames  =item objectNames
# Line 605  Line 965 
965    
966  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
967    
968    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
969    be processed. The idea is to make it less likely to find the verb by accident.
970    
971  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
972  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
973  relation.  relation.
974    
975    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
976    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
977    a positive number. So, for example
978    
979    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
980    
981    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
982    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
983    use
984    
985    C<< "LIMIT 10" >>
986    
987  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
988    
989  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 624  Line 999 
999  sub Get {  sub Get {
1000          # Get the parameters.          # Get the parameters.
1001          my ($self, $objectNames, $filterClause, @params) = @_;          my ($self, $objectNames, $filterClause, @params) = @_;
1002        # Adjust the list of object names to account for multiple occurrences of the
1003        # same object. We start with a hash table keyed on object name that will
1004        # return the object suffix. The first time an object is encountered it will
1005        # not be found in the hash. The next time the hash will map the object name
1006        # to 2, then 3, and so forth.
1007        my %objectHash = ();
1008        # This list will contain the object names as they are to appear in the
1009        # FROM list.
1010        my @fromList = ();
1011        # This list contains the suffixed object name for each object. It is exactly
1012        # parallel to the list in the $objectNames parameter.
1013        my @mappedNameList = ();
1014        # Finally, this hash translates from a mapped name to its original object name.
1015        my %mappedNameHash = ();
1016        # Now we create the lists. Note that for every single name we push something into
1017        # @fromList and @mappedNameList. This insures that those two arrays are exactly
1018        # parallel to $objectNames.
1019        for my $objectName (@{$objectNames}) {
1020            # Get the next suffix for this object.
1021            my $suffix = $objectHash{$objectName};
1022            if (! $suffix) {
1023                # Here we are seeing the object for the first time. The object name
1024                # is used as is.
1025                push @mappedNameList, $objectName;
1026                push @fromList, $objectName;
1027                $mappedNameHash{$objectName} = $objectName;
1028                # Denote the next suffix will be 2.
1029                $objectHash{$objectName} = 2;
1030            } else {
1031                # Here we've seen the object before. We construct a new name using
1032                # the suffix from the hash and update the hash.
1033                my $mappedName = "$objectName$suffix";
1034                $objectHash{$objectName} = $suffix + 1;
1035                # The FROM list has the object name followed by the mapped name. This
1036                # tells SQL it's still the same table, but we're using a different name
1037                # for it to avoid confusion.
1038                push @fromList, "$objectName $mappedName";
1039                # The mapped-name list contains the real mapped name.
1040                push @mappedNameList, $mappedName;
1041                # Finally, enable us to get back from the mapped name to the object name.
1042                $mappedNameHash{$mappedName} = $objectName;
1043            }
1044        }
1045          # Construct the SELECT statement. The general pattern is          # Construct the SELECT statement. The general pattern is
1046          #          #
1047          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
1048          #          #
1049          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1050          my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
1051                                  join(', ', @{$objectNames});                  join(', ', @fromList);
1052          # Check for a filter clause.          # Check for a filter clause.
1053          if ($filterClause) {          if ($filterClause) {
1054                  # Here we have one, so we convert its field names and add it to the query. First,                  # Here we have one, so we convert its field names and add it to the query. First,
# Line 638  Line 1056 
1056                  my $filterString = $filterClause;                  my $filterString = $filterClause;
1057                  # Next, we sort the object names by length. This helps protect us from finding                  # Next, we sort the object names by length. This helps protect us from finding
1058                  # object names inside other object names when we're doing our search and replace.                  # object names inside other object names when we're doing our search and replace.
1059                  my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1060                  # We will also keep a list of conditions to add to the WHERE clause in order to link                  # We will also keep a list of conditions to add to the WHERE clause in order to link
1061                  # entities and relationships as well as primary relations to secondary ones.                  # entities and relationships as well as primary relations to secondary ones.
1062                  my @joinWhere = ();                  my @joinWhere = ();
1063                  # The final preparatory step is to create a hash table of relation names. The                  # The final preparatory step is to create a hash table of relation names. The
1064                  # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1065                  my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1066                  for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1067                          $fromNames{$objectName} = 1;          # occurring or optional fields.
1068                  }          my %fromNames = map { $_ => 1 } @sortedNames;
1069                  # We are ready to begin. We loop through the object names, replacing each                  # We are ready to begin. We loop through the object names, replacing each
1070                  # object name's field references by the corresponding SQL field reference.                  # object name's field references by the corresponding SQL field reference.
1071                  # Along the way, if we find a secondary relation, we will need to add it                  # Along the way, if we find a secondary relation, we will need to add it
1072                  # to the FROM clause.                  # to the FROM clause.
1073                  for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1074                          # Get the length of the object name plus 2. This is the value we add to the                          # Get the length of the object name plus 2. This is the value we add to the
1075                          # size of the field name to determine the size of the field reference as a                          # size of the field name to determine the size of the field reference as a
1076                          # whole.                          # whole.
1077                          my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1078                # Get the real object name for this mapped name.
1079                my $objectName = $mappedNameHash{$mappedName};
1080                Trace("Processing $mappedName for object $objectName.") if T(4);
1081                          # Get the object's field list.                          # Get the object's field list.
1082                          my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1083                          # Find the field references for this object.                          # Find the field references for this object.
1084                          while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1085                                  # At this point, $1 contains the field name, and the current position                                  # At this point, $1 contains the field name, and the current position
1086                                  # is set immediately after the final parenthesis. We pull out the name of                                  # is set immediately after the final parenthesis. We pull out the name of
1087                                  # the field and the position and length of the field reference as a whole.                                  # the field and the position and length of the field reference as a whole.
# Line 671  Line 1092 
1092                                  if (!exists $fieldList->{$fieldName}) {                                  if (!exists $fieldList->{$fieldName}) {
1093                                          Confess("Field $fieldName not found for object $objectName.");                                          Confess("Field $fieldName not found for object $objectName.");
1094                                  } else {                                  } else {
1095                        Trace("Processing $fieldName at position $pos.") if T(4);
1096                                          # Get the field's relation.                                          # Get the field's relation.
1097                                          my $relationName = $fieldList->{$fieldName}->{relation};                                          my $relationName = $fieldList->{$fieldName}->{relation};
1098                        # Now we have a secondary relation. We need to insure it matches the
1099                        # mapped name of the primary relation. First we peel off the suffix
1100                        # from the mapped name.
1101                        my $mappingSuffix = substr $mappedName, length($objectName);
1102                        # Put the mapping suffix onto the relation name to get the
1103                        # mapped relation name.
1104                        my $mappedRelationName = "$relationName$mappingSuffix";
1105                                          # Insure the relation is in the FROM clause.                                          # Insure the relation is in the FROM clause.
1106                                          if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1107                                                  # Add the relation to the FROM clause.                                                  # Add the relation to the FROM clause.
1108                            if ($mappedRelationName eq $relationName) {
1109                                # The name is un-mapped, so we add it without
1110                                # any frills.
1111                                                  $command .= ", $relationName";                                                  $command .= ", $relationName";
                                                 # Create its join sub-clause.  
1112                                                  push @joinWhere, "$objectName.id = $relationName.id";                                                  push @joinWhere, "$objectName.id = $relationName.id";
1113                                                  # Denote we have it available for future fields.                          } else {
1114                                                  $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1115                                $command .= ", $relationName $mappedRelationName";
1116                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1117                            }
1118                            # Denote we have this relation available for future fields.
1119                            $fromNames{$mappedRelationName} = 1;
1120                                          }                                          }
1121                                          # Form an SQL field reference from the relation name and the field name.                                          # Form an SQL field reference from the relation name and the field name.
1122                                          my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1123                                          # Put it into the filter string in place of the old value.                                          # Put it into the filter string in place of the old value.
1124                                          substr($filterString, $pos, $len) = $sqlReference;                                          substr($filterString, $pos, $len) = $sqlReference;
1125                                          # Reposition the search.                                          # Reposition the search.
# Line 695  Line 1131 
1131                  # is more than one object in the object list. We start with the first object and                  # is more than one object in the object list. We start with the first object and
1132                  # run through the objects after it. Note also that we make a safety copy of the                  # run through the objects after it. Note also that we make a safety copy of the
1133                  # list before running through it.                  # list before running through it.
1134                  my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1135                  my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1136                  # Get the join table.                  # Get the join table.
1137                  my $joinTable = $self->{_metaData}->{Joins};                  my $joinTable = $self->{_metaData}->{Joins};
1138                  # Loop through the object list.                  # Loop through the object list.
1139                  for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1140                          # Look for a join.              # Look for a join using the real object names.
1141                my $lastObject = $mappedNameHash{$lastMappedObject};
1142                my $thisObject = $mappedNameHash{$thisMappedObject};
1143                          my $joinKey = "$lastObject/$thisObject";                          my $joinKey = "$lastObject/$thisObject";
1144                          if (!exists $joinTable->{$joinKey}) {                          if (!exists $joinTable->{$joinKey}) {
1145                                  # Here there's no join, so we throw an error.                                  # Here there's no join, so we throw an error.
1146                                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1147                          } else {                          } else {
1148                                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1149                                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1150                    # Fix the names.
1151                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1152                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1153                    push @joinWhere, $unMappedJoin;
1154                                  # Save this object as the last object for the next iteration.                                  # Save this object as the last object for the next iteration.
1155                                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1156                          }                          }
1157                  }                  }
1158                  # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
1159                  # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
1160            # We'll put the ORDER BY / LIMIT clauses in the following variable.
1161                  my $orderClause = "";                  my $orderClause = "";
1162                  # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
1163                  if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
1164                          # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
1165                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
1166                          my $pos = pos $filterString;                          my $pos = pos $filterString;
1167                          $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
1168                          $filterString = $1;                          $filterString = $1;
1169                  }                  }
1170                  # Add the filter and the join clauses (if any) to the SELECT command.                  # Add the filter and the join clauses (if any) to the SELECT command.
1171                  if ($filterString) {                  if ($filterString) {
1172                Trace("Filter string is \"$filterString\".") if T(4);
1173                          push @joinWhere, "($filterString)";                          push @joinWhere, "($filterString)";
1174                  }                  }
1175                  if (@joinWhere) {                  if (@joinWhere) {
1176                          $command .= " WHERE " . join(' AND ', @joinWhere);                          $command .= " WHERE " . join(' AND ', @joinWhere);
1177                  }                  }
1178                  # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
1179                  if ($orderClause) {                  if ($orderClause) {
1180                          $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
1181                  }                  }
1182          }          }
1183          Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(SQL => 3);
1184          Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
1185          my $sth = $dbh->prepare_command($command);          my $sth = $dbh->prepare_command($command);
1186          # Execute it with the parameters bound in.          # Execute it with the parameters bound in.
1187          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1188        # Now we create the relation map, which enables DBQuery to determine the order, name
1189        # and mapped name for each object in the query.
1190        my @relationMap = ();
1191        for my $mappedName (@mappedNameList) {
1192            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1193        }
1194          # Return the statement object.          # Return the statement object.
1195          my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1196        return $retVal;
1197    }
1198    
1199    =head3 Delete
1200    
1201    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1202    
1203    Delete an entity instance from the database. The instance is deleted along with all entity and
1204    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1205    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1206    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1207    dependent relationship.
1208    
1209    =over 4
1210    
1211    =item entityName
1212    
1213    Name of the entity type for the instance being deleted.
1214    
1215    =item objectID
1216    
1217    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1218    then it is presumed to by a LIKE pattern.
1219    
1220    =item testFlag
1221    
1222    If TRUE, the delete statements will be traced without being executed.
1223    
1224    =item RETURN
1225    
1226    Returns a statistics object indicating how many records of each particular table were
1227    deleted.
1228    
1229    =back
1230    
1231    =cut
1232    #: Return Type $%;
1233    sub Delete {
1234        # Get the parameters.
1235        my ($self, $entityName, $objectID, $testFlag) = @_;
1236        # Declare the return variable.
1237        my $retVal = Stats->new();
1238        # Get the DBKernel object.
1239        my $db = $self->{_dbh};
1240        # We're going to generate all the paths branching out from the starting entity. One of
1241        # the things we have to be careful about is preventing loops. We'll use a hash to
1242        # determine if we've hit a loop.
1243        my %alreadyFound = ();
1244        # These next lists will serve as our result stack. We start by pushing object lists onto
1245        # the stack, and then popping them off to do the deletes. This means the deletes will
1246        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1247        # sure we don't delete records that might be needed to forge relationships back to the
1248        # original item. We have two lists-- one for TO-relationships, and one for
1249        # FROM-relationships and entities.
1250        my @fromPathList = ();
1251        my @toPathList = ();
1252        # This final hash is used to remember what work still needs to be done. We push paths
1253        # onto the list, then pop them off to extend the paths. We prime it with the starting
1254        # point. Note that we will work hard to insure that the last item on a path in the
1255        # TODO list is always an entity.
1256        my @todoList = ([$entityName]);
1257        while (@todoList) {
1258            # Get the current path.
1259            my $current = pop @todoList;
1260            # Copy it into a list.
1261            my @stackedPath = @{$current};
1262            # Pull off the last item on the path. It will always be an entity.
1263            my $entityName = pop @stackedPath;
1264            # Add it to the alreadyFound list.
1265            $alreadyFound{$entityName} = 1;
1266            # Get the entity data.
1267            my $entityData = $self->_GetStructure($entityName);
1268            # The first task is to loop through the entity's relation. A DELETE command will
1269            # be needed for each of them.
1270            my $relations = $entityData->{Relations};
1271            for my $relation (keys %{$relations}) {
1272                my @augmentedList = (@stackedPath, $relation);
1273                push @fromPathList, \@augmentedList;
1274            }
1275            # Now we need to look for relationships connected to this entity.
1276            my $relationshipList = $self->{_metaData}->{Relationships};
1277            for my $relationshipName (keys %{$relationshipList}) {
1278                my $relationship = $relationshipList->{$relationshipName};
1279                # Check the FROM field. We're only interested if it's us.
1280                if ($relationship->{from} eq $entityName) {
1281                    # Add the path to this relationship.
1282                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1283                    push @fromPathList, \@augmentedList;
1284                    # Check the arity. If it's MM we're done. If it's 1M
1285                    # and the target hasn't been seen yet, we want to
1286                    # stack the entity for future processing.
1287                    if ($relationship->{arity} eq '1M') {
1288                        my $toEntity = $relationship->{to};
1289                        if (! exists $alreadyFound{$toEntity}) {
1290                            # Here we have a new entity that's dependent on
1291                            # the current entity, so we need to stack it.
1292                            my @stackList = (@augmentedList, $toEntity);
1293                            push @fromPathList, \@stackList;
1294                        } else {
1295                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1296                        }
1297                    }
1298                }
1299                # Now check the TO field. In this case only the relationship needs
1300                # deletion.
1301                if ($relationship->{to} eq $entityName) {
1302                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1303                    push @toPathList, \@augmentedList;
1304                }
1305            }
1306        }
1307        # Create the first qualifier for the WHERE clause. This selects the
1308        # keys of the primary entity records to be deleted. When we're deleting
1309        # from a dependent table, we construct a join page from the first qualifier
1310        # to the table containing the dependent records to delete.
1311        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1312        # We need to make two passes. The first is through the to-list, and
1313        # the second through the from-list. The from-list is second because
1314        # the to-list may need to pass through some of the entities the
1315        # from-list would delete.
1316        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1317        # Now it's time to do the deletes. We do it in two passes.
1318        for my $keyName ('to_link', 'from_link') {
1319            # Get the list for this key.
1320            my @pathList = @{$stackList{$keyName}};
1321            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1322            # Loop through this list.
1323            while (my $path = pop @pathList) {
1324                # Get the table whose rows are to be deleted.
1325                my @pathTables = @{$path};
1326                # Start the DELETE statement. We need to call DBKernel because the
1327                # syntax of a DELETE-USING varies among DBMSs.
1328                my $target = $pathTables[$#pathTables];
1329                my $stmt = $db->SetUsing(@pathTables);
1330                # Now start the WHERE. The first thing is the ID field from the starting table. That
1331                # starting table will either be the entity relation or one of the entity's
1332                # sub-relations.
1333                $stmt .= " WHERE $pathTables[0].id $qualifier";
1334                # Now we run through the remaining entities in the path, connecting them up.
1335                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1336                    # Connect the current relationship to the preceding entity.
1337                    my ($entity, $rel) = @pathTables[$i-1,$i];
1338                    # The style of connection depends on the direction of the relationship.
1339                    $stmt .= " AND $entity.id = $rel.$keyName";
1340                    if ($i + 1 <= $#pathTables) {
1341                        # Here there's a next entity, so connect that to the relationship's
1342                        # to-link.
1343                        my $entity2 = $pathTables[$i+1];
1344                        $stmt .= " AND $rel.to_link = $entity2.id";
1345                    }
1346                }
1347                # Now we have our desired DELETE statement.
1348                if ($testFlag) {
1349                    # Here the user wants to trace without executing.
1350                    Trace($stmt) if T(0);
1351                } else {
1352                    # Here we can delete. Note that the SQL method dies with a confessing
1353                    # if an error occurs, so we just go ahead and do it.
1354                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1355                    my $rv = $db->SQL($stmt, 0, $objectID);
1356                    # Accumulate the statistics for this delete. The only rows deleted
1357                    # are from the target table, so we use its name to record the
1358                    # statistic.
1359                    $retVal->Add($target, $rv);
1360                }
1361            }
1362        }
1363        # Return the result.
1364          return $retVal;          return $retVal;
1365  }  }
1366    
1367  =head3 GetList  =head3 GetList
1368    
1369  C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
1370    
1371  Return a list of object descriptors for the specified objects as determined by the  Return a list of object descriptors for the specified objects as determined by the
1372  specified filter clause.  specified filter clause.
1373    
1374  This method is essentially the same as L</Get> except it returns a list of objects rather  This method is essentially the same as L</Get> except it returns a list of objects rather
1375  that a query object that can be used to get the results one record at a time.  than a query object that can be used to get the results one record at a time.
   
 =over 4  
1376    
1377  =over 4  =over 4
1378    
# Line 812  Line 1429 
1429    
1430  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1431    
1432  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1433    
1434  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1435    
# Line 847  Line 1464 
1464    
1465  =head3 DumpRelations  =head3 DumpRelations
1466    
1467  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1468    
1469  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1470  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 889  Line 1506 
1506    
1507  =head3 InsertObject  =head3 InsertObject
1508    
1509  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1510    
1511  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1512  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 898  Line 1515 
1515  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1516  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1517    
1518  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1519    
1520  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1521  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1522    
1523  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1524    
1525  =over 4  =over 4
1526    
# Line 1028  Line 1645 
1645    
1646  =head3 LoadTable  =head3 LoadTable
1647    
1648  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1649    
1650  Load data from a tab-delimited file into a specified table, optionally re-creating the table first.  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1651    first.
1652    
1653  =over 4  =over 4
1654    
# Line 1048  Line 1666 
1666    
1667  =item RETURN  =item RETURN
1668    
1669  Returns a statistical object containing the number of records read and a list of the error messages.  Returns a statistical object containing a list of the error messages.
1670    
1671  =back  =back
1672    
# Line 1059  Line 1677 
1677          # Create the statistical return object.          # Create the statistical return object.
1678          my $retVal = _GetLoadStats();          my $retVal = _GetLoadStats();
1679          # Trace the fact of the load.          # Trace the fact of the load.
1680          Trace("Loading table $relationName from $fileName") if T(1);      Trace("Loading table $relationName from $fileName") if T(2);
1681          # Get the database handle.          # Get the database handle.
1682          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1683        # Get the input file size.
1684        my $fileSize = -s $fileName;
1685          # Get the relation data.          # Get the relation data.
1686          my $relation = $self->_FindRelation($relationName);          my $relation = $self->_FindRelation($relationName);
1687          # Check the truncation flag.          # Check the truncation flag.
1688          if ($truncateFlag) {          if ($truncateFlag) {
1689                  Trace("Creating table $relationName") if T(1);          Trace("Creating table $relationName") if T(2);
1690            # Compute the row count estimate. We take the size of the load file,
1691            # divide it by the estimated row size, and then multiply by 1.5 to
1692            # leave extra room. We postulate a minimum row count of 1000 to
1693            # prevent problems with incoming empty load files.
1694            my $rowSize = $self->EstimateRowSize($relationName);
1695            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1696                  # Re-create the table without its index.                  # Re-create the table without its index.
1697                  $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1698            # If this is a pre-index DBMS, create the index here.
1699            if ($dbh->{_preIndex}) {
1700                eval {
1701                    $self->CreateIndex($relationName);
1702                };
1703                if ($@) {
1704                    $retVal->AddMessage($@);
1705                }
1706            }
1707          }          }
         # Determine whether or not this is a primary relation. Primary relations have an extra  
         # field indicating whether or not a given object is new or was loaded from the flat files.  
         my $primary = $self->_IsPrimary($relationName);  
         # Get the number of fields in this relation.  
         my @fieldList = @{$relation->{Fields}};  
         my $fieldCount = @fieldList;  
         # Record the number of expected fields.  
         my $expectedFields = $fieldCount + ($primary ? 1 : 0);  
         # Start a database transaction.  
         $dbh->begin_tran;  
         # Open the relation file. We need to create a cleaned-up copy before loading.  
         open TABLEIN, '<', $fileName;  
         my $tempName = "$fileName.tbl";  
         open TABLEOUT, '>', $tempName;  
         # Loop through the file.  
         while (<TABLEIN>) {  
                 # Chop off the new-line character.  
                 my $record = $_;  
                 chomp $record;  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Eliminate any trailing tabs.  
             chop $record while substr($record, -1) eq "\t";  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 $record .= "\t0";  
             }  
             # Write the record.  
             print TABLEOUT "$record\n";  
             # Count the record read.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         }  
         }  
         # Close the files.  
         close TABLEIN;  
         close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(4);  
1708      # Load the table.      # Load the table.
1709          my $rv;          my $rv;
1710          eval {          eval {
1711                  $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1712          };          };
1713          if (!defined $rv) {          if (!defined $rv) {
1714          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1715          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1716                  Trace("Table load failed for $relationName.") if T(1);                  Trace("Table load failed for $relationName.") if T(1);
1717          } else {          } else {
1718                  # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1719                  Trace("$retVal->{records} records read for $relationName.") if T(1);          $retVal->Add("tables");
1720            my $size = -s $fileName;
1721            Trace("$size bytes loaded into $relationName.") if T(2);
1722                  # If we're rebuilding, we need to create the table indexes.                  # If we're rebuilding, we need to create the table indexes.
1723                  if ($truncateFlag) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1724                          eval {                          eval {
1725                                  $self->CreateIndex($relationName);                                  $self->CreateIndex($relationName);
1726                          };                          };
# Line 1134  Line 1729 
1729                          }                          }
1730                  }                  }
1731          }          }
1732          # Commit the database changes.      # Analyze the table to improve performance.
1733          $dbh->commit_tran;      $dbh->vacuum_it($relationName);
         # Delete the temporary file.  
         unlink $tempName;  
1734          # Return the statistics.          # Return the statistics.
1735          return $retVal;          return $retVal;
1736  }  }
1737    
1738  =head3 GenerateEntity  =head3 GenerateEntity
1739    
1740  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1741    
1742  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1743  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1202  Line 1795 
1795    
1796  =head3 GetEntity  =head3 GetEntity
1797    
1798  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1799    
1800  Return an object describing the entity instance with a specified ID.  Return an object describing the entity instance with a specified ID.
1801    
# Line 1238  Line 1831 
1831    
1832  =head3 GetEntityValues  =head3 GetEntityValues
1833    
1834  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1835    
1836  Return a list of values from a specified entity instance.  Return a list of values from a specified entity instance.
1837    
# Line 1279  Line 1872 
1872          return @retVal;          return @retVal;
1873  }  }
1874    
1875    =head3 GetAll
1876    
1877    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1878    
1879    Return a list of values taken from the objects returned by a query. The first three
1880    parameters correspond to the parameters of the L</Get> method. The final parameter is
1881    a list of the fields desired from each record found by the query. The field name
1882    syntax is the standard syntax used for fields in the B<ERDB> system--
1883    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
1884    or relationship and I<fieldName> is the name of the field.
1885    
1886    The list returned will be a list of lists. Each element of the list will contain
1887    the values returned for the fields specified in the fourth parameter. If one of the
1888    fields specified returns multiple values, they are flattened in with the rest. For
1889    example, the following call will return a list of the features in a particular
1890    spreadsheet cell, and each feature will be represented by a list containing the
1891    feature ID followed by all of its aliases.
1892    
1893    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1894    
1895    =over 4
1896    
1897    =item objectNames
1898    
1899    List containing the names of the entity and relationship objects to be retrieved.
1900    
1901    =item filterClause
1902    
1903    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1904    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1905    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1906    parameter list as additional parameters. The fields in a filter clause can come from primary
1907    entity relations, relationship relations, or secondary entity relations; however, all of the
1908    entities and relationships involved must be included in the list of object names.
1909    
1910    =item parameterList
1911    
1912    List of the parameters to be substituted in for the parameters marks in the filter clause.
1913    
1914    =item fields
1915    
1916    List of the fields to be returned in each element of the list returned.
1917    
1918    =item count
1919    
1920    Maximum number of records to return. If omitted or 0, all available records will be returned.
1921    
1922    =item RETURN
1923    
1924    Returns a list of list references. Each element of the return list contains the values for the
1925    fields specified in the B<fields> parameter.
1926    
1927    =back
1928    
1929    =cut
1930    #: Return Type @@;
1931    sub GetAll {
1932        # Get the parameters.
1933        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
1934        # Translate the parameters from a list reference to a list. If the parameter
1935        # list is a scalar we convert it into a singleton list.
1936        my @parmList = ();
1937        if (ref $parameterList eq "ARRAY") {
1938            @parmList = @{$parameterList};
1939        } else {
1940            push @parmList, $parameterList;
1941        }
1942        # Insure the counter has a value.
1943        if (!defined $count) {
1944            $count = 0;
1945        }
1946        # Add the row limit to the filter clause.
1947        if ($count > 0) {
1948            $filterClause .= " LIMIT $count";
1949        }
1950        # Create the query.
1951        my $query = $self->Get($objectNames, $filterClause, @parmList);
1952        # Set up a counter of the number of records read.
1953        my $fetched = 0;
1954        # Loop through the records returned, extracting the fields. Note that if the
1955        # counter is non-zero, we stop when the number of records read hits the count.
1956        my @retVal = ();
1957        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
1958            my @rowData = $row->Values($fields);
1959            push @retVal, \@rowData;
1960            $fetched++;
1961        }
1962        # Return the resulting list.
1963        return @retVal;
1964    }
1965    
1966    =head3 EstimateRowSize
1967    
1968    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1969    
1970    Estimate the row size of the specified relation. The estimated row size is computed by adding
1971    up the average length for each data type.
1972    
1973    =over 4
1974    
1975    =item relName
1976    
1977    Name of the relation whose estimated row size is desired.
1978    
1979    =item RETURN
1980    
1981    Returns an estimate of the row size for the specified relation.
1982    
1983    =back
1984    
1985    =cut
1986    #: Return Type $;
1987    sub EstimateRowSize {
1988        # Get the parameters.
1989        my ($self, $relName) = @_;
1990        # Declare the return variable.
1991        my $retVal = 0;
1992        # Find the relation descriptor.
1993        my $relation = $self->_FindRelation($relName);
1994        # Get the list of fields.
1995        for my $fieldData (@{$relation->{Fields}}) {
1996            # Get the field type and add its length.
1997            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1998            $retVal += $fieldLen;
1999        }
2000        # Return the result.
2001        return $retVal;
2002    }
2003    
2004    =head3 GetFieldTable
2005    
2006    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
2007    
2008    Get the field structure for a specified entity or relationship.
2009    
2010    =over 4
2011    
2012    =item objectName
2013    
2014    Name of the desired entity or relationship.
2015    
2016    =item RETURN
2017    
2018    The table containing the field descriptors for the specified object.
2019    
2020    =back
2021    
2022    =cut
2023    
2024    sub GetFieldTable {
2025        # Get the parameters.
2026        my ($self, $objectName) = @_;
2027        # Get the descriptor from the metadata.
2028        my $objectData = $self->_GetStructure($objectName);
2029        # Return the object's field table.
2030        return $objectData->{Fields};
2031    }
2032    
2033    =head3 GetUsefulCrossValues
2034    
2035    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
2036    
2037    Return a list of the useful attributes that would be returned by a B<Cross> call
2038    from an entity of the source entity type through the specified relationship. This
2039    means it will return the fields of the target entity type and the intersection data
2040    fields in the relationship. Only primary table fields are returned. In other words,
2041    the field names returned will be for fields where there is always one and only one
2042    value.
2043    
2044    =over 4
2045    
2046    =item sourceEntity
2047    
2048    Name of the entity from which the relationship crossing will start.
2049    
2050    =item relationship
2051    
2052    Name of the relationship being crossed.
2053    
2054    =item RETURN
2055    
2056    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2057    
2058    =back
2059    
2060    =cut
2061    #: Return Type @;
2062    sub GetUsefulCrossValues {
2063        # Get the parameters.
2064        my ($self, $sourceEntity, $relationship) = @_;
2065        # Declare the return variable.
2066        my @retVal = ();
2067        # Determine the target entity for the relationship. This is whichever entity is not
2068        # the source entity. So, if the source entity is the FROM, we'll get the name of
2069        # the TO, and vice versa.
2070        my $relStructure = $self->_GetStructure($relationship);
2071        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2072        my $targetEntity = $relStructure->{$targetEntityType};
2073        # Get the field table for the entity.
2074        my $entityFields = $self->GetFieldTable($targetEntity);
2075        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2076        # For the entity fields, the key aspect of the target structure is that the {relation} value
2077        # must match the entity name.
2078        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2079                            keys %{$entityFields};
2080        # Push the fields found onto the return variable.
2081        push @retVal, sort @fieldList;
2082        # Get the field table for the relationship.
2083        my $relationshipFields = $self->GetFieldTable($relationship);
2084        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2085        # This may end up being an empty set.
2086        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2087                            keys %{$relationshipFields};
2088        # Push these onto the return list.
2089        push @retVal, sort @fieldList2;
2090        # Return the result.
2091        return @retVal;
2092    }
2093    
2094  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2095    
2096  =head3 GetLoadStats  =head3 GetLoadStats
# Line 1290  Line 2102 
2102  =cut  =cut
2103    
2104  sub _GetLoadStats {  sub _GetLoadStats {
2105          return Stats->new('records');      return Stats->new();
2106  }  }
2107    
2108  =head3 GenerateFields  =head3 GenerateFields
# Line 1485  Line 2297 
2297          return $objectData->{Relations};          return $objectData->{Relations};
2298  }  }
2299    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
         # Get the parameters.  
         my ($self, $objectName) = @_;  
         # Get the descriptor from the metadata.  
         my $objectData = $self->_GetStructure($objectName);  
         # Return the object's field table.  
         return $objectData->{Fields};  
 }  
   
2300  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2301    
2302  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
# Line 1654  Line 2437 
2437  sub _LoadMetaData {  sub _LoadMetaData {
2438          # Get the parameters.          # Get the parameters.
2439          my ($filename) = @_;          my ($filename) = @_;
2440        Trace("Reading Sprout DBD from $filename.") if T(2);
2441          # Slurp the XML file into a variable. Extensive use of options is used to insure we          # Slurp the XML file into a variable. Extensive use of options is used to insure we
2442          # get the exact structure we want.          # get the exact structure we want.
2443          my $metadata = XML::Simple::XMLin($filename,          my $metadata = XML::Simple::XMLin($filename,
# Line 1681  Line 2465 
2465          for my $entityName (keys %{$entityList}) {          for my $entityName (keys %{$entityList}) {
2466                  my $entityStructure = $entityList->{$entityName};                  my $entityStructure = $entityList->{$entityName};
2467                  #                  #
2468                  # The first step is to run creating all the entity's default values. For C<Field> elements,          # The first step is to create all the entity's default values. For C<Field> elements,
2469                  # the relation name must be added where it is not specified. For relationships,                  # the relation name must be added where it is not specified. For relationships,
2470                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>
2471                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute
# Line 1860  Line 2644 
2644                  my @fromList = ();                  my @fromList = ();
2645                  my @toList = ();                  my @toList = ();
2646                  my @bothList = ();                  my @bothList = ();
2647                  Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
2648                  for my $relationshipName (keys %{$relationshipList}) {                  for my $relationshipName (keys %{$relationshipList}) {
2649                          my $relationship = $relationshipList->{$relationshipName};                          my $relationship = $relationshipList->{$relationshipName};
2650                          # Determine if this relationship has our entity in one of its link fields.                          # Determine if this relationship has our entity in one of its link fields.
2651                          my $fromEntity = $relationship->{from};                          my $fromEntity = $relationship->{from};
2652                          my $toEntity = $relationship->{to};                          my $toEntity = $relationship->{to};
2653                          Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(Joins => 4);
2654                          if ($fromEntity eq $entityName) {                          if ($fromEntity eq $entityName) {
2655                                  if ($toEntity eq $entityName) {                                  if ($toEntity eq $entityName) {
2656                                          # Here the relationship is recursive.                                          # Here the relationship is recursive.
2657                                          push @bothList, $relationshipName;                                          push @bothList, $relationshipName;
2658                                          Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2659                                  } else {                                  } else {
2660                                          # Here the relationship comes from the entity.                                          # Here the relationship comes from the entity.
2661                                          push @fromList, $relationshipName;                                          push @fromList, $relationshipName;
2662                                          Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2663                                  }                                  }
2664                          } elsif ($toEntity eq $entityName) {                          } elsif ($toEntity eq $entityName) {
2665                                  # Here the relationship goes to the entity.                                  # Here the relationship goes to the entity.
2666                                  push @toList, $relationshipName;                                  push @toList, $relationshipName;
2667                                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2668                          }                          }
2669                  }                  }
2670                  # Create the nonrecursive joins. Note that we build two hashes for running                  # Create the nonrecursive joins. Note that we build two hashes for running
# Line 1896  Line 2680 
2680                                  # Create joins between the entity and this relationship.                                  # Create joins between the entity and this relationship.
2681                                  my $linkField = "$relationshipName.${linkType}_link";                                  my $linkField = "$relationshipName.${linkType}_link";
2682                                  my $joinClause = "$entityName.id = $linkField";                                  my $joinClause = "$entityName.id = $linkField";
2683                                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2684                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2685                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2686                                  # Create joins between this relationship and the other relationships.                                  # Create joins between this relationship and the other relationships.
# Line 1917  Line 2701 
2701                                                          # relationship and itself are prohibited.                                                          # relationship and itself are prohibited.
2702                                                          my $relJoinClause = "$otherName.${otherType}_link = $linkField";                                                          my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2703                                                          $joinTable{$joinKey} = $relJoinClause;                                                          $joinTable{$joinKey} = $relJoinClause;
2704                                                          Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2705                                                  }                                                  }
2706                                          }                                          }
2707                                  }                                  }
# Line 1926  Line 2710 
2710                                  # relationship can only be ambiguous with another recursive relationship,                                  # relationship can only be ambiguous with another recursive relationship,
2711                                  # and the incoming relationship from the outer loop is never recursive.                                  # and the incoming relationship from the outer loop is never recursive.
2712                                  for my $otherName (@bothList) {                                  for my $otherName (@bothList) {
2713                                          Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2714                                          # Join from the left.                                          # Join from the left.
2715                                          $joinTable{"$relationshipName/$otherName"} =                                          $joinTable{"$relationshipName/$otherName"} =
2716                                                  "$linkField = $otherName.from_link";                                                  "$linkField = $otherName.from_link";
# Line 1941  Line 2725 
2725                  # rise to situations where we can't create the path we want; however, it is always                  # rise to situations where we can't create the path we want; however, it is always
2726                  # possible to get the same effect using multiple queries.                  # possible to get the same effect using multiple queries.
2727                  for my $relationshipName (@bothList) {                  for my $relationshipName (@bothList) {
2728                          Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2729                          # Join to the entity from each direction.                          # Join to the entity from each direction.
2730                          $joinTable{"$entityName/$relationshipName"} =                          $joinTable{"$entityName/$relationshipName"} =
2731                                  "$entityName.id = $relationshipName.from_link";                                  "$entityName.id = $relationshipName.from_link";
# Line 1955  Line 2739 
2739          return $metadata;          return $metadata;
2740  }  }
2741    
2742    =head3 SortNeeded
2743    
2744    C<< my $flag = $erdb->SortNeeded($relationName); >>
2745    
2746    Return TRUE if the specified relation should be sorted during loading to remove duplicate keys,
2747    else FALSE.
2748    
2749    =over 4
2750    
2751    =item relationName
2752    
2753    Name of the relation to be examined.
2754    
2755    =item RETURN
2756    
2757    Returns TRUE if the relation needs a sort, else FALSE.
2758    
2759    =back
2760    
2761    =cut
2762    #: Return Type $;
2763    sub SortNeeded {
2764        # Get the parameters.
2765        my ($self, $relationName) = @_;
2766        # Declare the return variable.
2767        my $retVal = 0;
2768        # Find out if the relation is a primary entity relation.
2769        my $entityTable = $self->{Entities};
2770        if (exists $entityTable->{$relationName}) {
2771            my $keyType = $entityTable->{$relationName}->{keyType};
2772            # If the key is not a hash string, we must do the sort.
2773            if ($keyType ne 'hash-string') {
2774                $retVal = 1;
2775            }
2776        }
2777        # Return the result.
2778        return $retVal;
2779    }
2780    
2781  =head3 CreateRelationshipIndex  =head3 CreateRelationshipIndex
2782    
2783  Create an index for a relationship's relation.  Create an index for a relationship's relation.
# Line 1992  Line 2815 
2815          # index descriptor does not exist, it will be created automatically so we can add          # index descriptor does not exist, it will be created automatically so we can add
2816          # the field to it.          # the field to it.
2817          unshift @{$newIndex->{IndexFields}}, $firstField;          unshift @{$newIndex->{IndexFields}}, $firstField;
2818        # If this is a one-to-many relationship, the "To" index is unique.
2819        if ($relationshipStructure->{arity} eq "1M" && $indexKey eq "To") {
2820            $newIndex->{Unique} = 'true';
2821        }
2822          # Add the index to the relation.          # Add the index to the relation.
2823          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
2824  }  }
# Line 2083  Line 2910 
2910                  # Here we have a field list. Loop through its fields.                  # Here we have a field list. Loop through its fields.
2911                  my $fieldStructures = $structure->{Fields};                  my $fieldStructures = $structure->{Fields};
2912                  for my $fieldName (keys %{$fieldStructures}) {                  for my $fieldName (keys %{$fieldStructures}) {
2913                Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
2914                          my $fieldData = $fieldStructures->{$fieldName};                          my $fieldData = $fieldStructures->{$fieldName};
2915                          # Get the field type.                          # Get the field type.
2916                          my $type = $fieldData->{type};                          my $type = $fieldData->{type};

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.42

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3