[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Wed May 4 03:24:43 2005 UTC revision 1.39, Sun Mar 26 17:24:55 2006 UTC
# Line 2  Line 2 
2    
3          use strict;          use strict;
4          use Tracer;          use Tracer;
5          use DBKernel;      use DBrtns;
6          use Data::Dumper;          use Data::Dumper;
7          use XML::Simple;          use XML::Simple;
8          use DBQuery;          use DBQuery;
9          use DBObject;          use DBObject;
10          use Stats;          use Stats;
11          use Time::HiRes qw(gettimeofday);          use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 32  Line 33 
33  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).
34  The B<FEATURE> entity also contains an optional virulence number. This is implemented  The B<FEATURE> entity also contains an optional virulence number. This is implemented
35  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number
36  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in the  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in
37  C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence number.  the C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence
38  If the virulence of I<ABC> is not known, there will not be any rows for it in C<FeatureVirulence>.  number. If the virulence of I<ABC> is not known, there will not be any rows for it in
39    C<FeatureVirulence>.
40    
41  Entities are connected by binary relationships implemented using single relations possessing the  Entities are connected by binary relationships implemented using single relations possessing the
42  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),
# Line 69  Line 71 
71  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet
72  fully implemented.  fully implemented.
73    
74    =head2 XML Database Description
75    
76    =head3 Data Types
77    
78    The ERDB system supports the following data types. Note that there are numerous string
79    types depending on the maximum length. Some database packages limit the total number of
80    characters you have in an index key; to insure the database works in all environments,
81    the type of string should be the shortest one possible that supports all the known values.
82    
83    =over 4
84    
85    =item char
86    
87    single ASCII character
88    
89    =item int
90    
91    32-bit signed integer
92    
93    =item date
94    
95    64-bit unsigned integer, representing a PERL date/time value
96    
97    =item text
98    
99    long string; Text fields cannot be used in indexes or sorting and do not support the
100    normal syntax of filter clauses, but can be up to a billion character in length
101    
102    =item float
103    
104    double-precision floating-point number
105    
106    =item boolean
107    
108    single-bit numeric value; The value is stored as a 16-bit signed integer (for
109    compatability with certain database packages), but the only values supported are
110    0 and 1.
111    
112    =item key-string
113    
114    variable-length string, maximum 40 characters
115    
116    =item name-string
117    
118    variable-length string, maximum 80 characters
119    
120    =item medium-string
121    
122    variable-length string, maximum 160 characters
123    
124    =item string
125    
126    variable-length string, maximum 255 characters
127    
128    =back
129    
130    =head3 Global Tags
131    
132    The entire database definition must be inside a B<Database> tag. The display name of
133    the database is given by the text associated with the B<Title> tag. The display name
134    is only used in the automated documentation. It has no other effect. The entities and
135    relationships are listed inside the B<Entities> and B<Relationships> tags,
136    respectively. None of these tags have attributes.
137    
138        <Database>
139            <Title>... display title here...</Title>
140            <Entities>
141                ... entity definitions here ...
142            </Entities>
143            <Relationships>
144                ... relationship definitions here...
145            </Relationships>
146        </Database>
147    
148    Entities, relationships, indexes, and fields all allow a text tag called B<Notes>.
149    The text inside the B<Notes> tag contains comments that will appear when the database
150    documentation is generated. Within a B<Notes> tag, you may use C<[i]> and C<[/i]> for
151    italics, C<[b]> and C<[/b]> for bold, and C<[p]> for a new paragraph.
152    
153    =head3 Fields
154    
155    Both entities and relationships have fields described by B<Field> tags. A B<Field>
156    tag can have B<Notes> associated with it. The complete set of B<Field> tags for an
157    object mus be inside B<Fields> tags.
158    
159        <Entity ... >
160            <Fields>
161                ... Field tags ...
162            </Fields>
163        </Entity>
164    
165    The attributes for the B<Field> tag are as follows.
166    
167    =over 4
168    
169    =item name
170    
171    Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
172    and the first character should be a letter. Most underlying databases are case-insensitive
173    with the respect to field names, so a best practice is to use lower-case letters only.
174    
175    =item type
176    
177    Data type of the field. The legal data types are given above.
178    
179    =item relation
180    
181    Name of the relation containing the field. This should only be specified for entity
182    fields. The ERDB system does not support optional fields or multi-occurring fields
183    in the primary relation of an entity. Instead, they are put into secondary relations.
184    So, for example, in the C<Genome> entity, the C<group-name> field indicates a special
185    grouping used to select a subset of the genomes. A given genome may not be in any
186    groups or may be in multiple groups. Therefore, C<group-name> specifies a relation
187    value. The relation name specified must be a valid table name. By convention, it is
188    usually the entity name followed by a qualifying word (e.g. C<GenomeGroup>). In an
189    entity, the fields without a relation attribute are said to belong to the
190    I<primary relation>. This relation has the same name as the entity itself.
191    
192    =back
193    
194    =head3 Indexes
195    
196    An entity can have multiple alternate indexes associated with it. The fields must
197    be from the primary relation. The alternate indexes assist in ordering results
198    from a query. A relationship can have up to two indexes-- a I<to-index> and a
199    I<from-index>. These order the results when crossing the relationship. For
200    example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the
201    from-index would order the contigs of a ganome, and the to-index would order
202    the genomes of a contig. A relationship's index must specify only fields in
203    the relationship.
204    
205    The indexes for an entity must be listed inside the B<Indexes> tag. The from-index
206    of a relationship is specified using the B<FromIndex> tag; the to-index is specified
207    using the B<ToIndex> tag.
208    
209    Each index can contain a B<Notes> tag. In addition, it will have an B<IndexFields>
210    tag containing the B<IndexField> tags. These specify, in order, the fields used in
211    the index. The attributes of an B<IndexField> tag are as follows.
212    
213    =over 4
214    
215    =item name
216    
217    Name of the field.
218    
219    =item order
220    
221    Sort order of the field-- C<ascending> or C<descending>.
222    
223    =back
224    
225    The B<Index>, B<FromIndex>, and B<ToIndex> tags themselves have no attributes.
226    
227    =head3 Object and Field Names
228    
229    By convention entity and relationship names use capital casing (e.g. C<Genome> or
230    C<HasRegionsIn>. Most underlying databases, however, are aggressively case-insensitive
231    with respect to relation names, converting them internally to all-upper case or
232    all-lower case.
233    
234    If syntax or parsing errors occur when you try to load or use an ERDB database, the
235    most likely reason is that one of your objects has an SQL reserved word as its name.
236    The list of SQL reserved words keeps increasing; however, most are unlikely to show
237    up as a noun or declarative verb phrase. The exceptions are C<Group>, C<User>,
238    C<Table>, C<Index>, C<Object>, C<Date>, C<Number>, C<Update>, C<Time>, C<Percent>,
239    C<Memo>, C<Order>, and C<Sum>. This problem can crop up in field names as well.
240    
241    Every entity has a field called C<id> that acts as its primary key. Every relationship
242    has fields called C<from-link> and C<to-link> that contain copies of the relevant
243    entity IDs. These are essentially ERDB's reserved words, and should not be used
244    for user-defined field names.
245    
246    =head3 Entities
247    
248    An entity is described by the B<Entity> tag. The entity can contain B<Notes>, an
249    B<Indexes> tag containing one or more secondary indexes, and a B<Fields> tag
250    containing one or more fields. The attributes of the B<Entity> tag are as follows.
251    
252    =over 4
253    
254    =item name
255    
256    Name of the entity. The entity name, by convention, uses capital casing (e.g. C<Genome>
257    or C<GroupBlock>) and should be a noun or noun phrase.
258    
259    =item keyType
260    
261    Data type of the primary key. The primary key is always named C<id>.
262    
263    =back
264    
265    =head3 Relationships
266    
267    A relationship is described by the C<Relationship> tag. Within a relationship,
268    there can be a C<Notes> tag, a C<Fields> tag containing the intersection data
269    fields, a C<FromIndex> tag containing the from-index, and a C<ToIndex> tag containing
270    the to-index.
271    
272    The C<Relationship> tag has the following attributes.
273    
274    =over 4
275    
276    =item name
277    
278    Name of the relationship. The relationship name, by convention, uses capital casing
279    (e.g. C<ContainsRegionIn> or C<HasContig>), and should be a declarative verb
280    phrase, designed to fit between the from-entity and the to-entity (e.g.
281    Block C<ContainsRegionIn> Genome).
282    
283    =item from
284    
285    Name of the entity from which the relationship starts.
286    
287    =item to
288    
289    Name of the entity to which the relationship proceeds.
290    
291    =item arity
292    
293    Relationship type: C<1M> for one-to-many and C<MM> for many-to-many.
294    
295    =back
296    
297  =cut  =cut
298    
299  # GLOBALS  # GLOBALS
# Line 76  Line 301 
301  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
302  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
303  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
304   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
305  my %TypeTable = ( char =>        { sqlType => 'CHAR(1)',                        maxLen => 1,                    dataGen => "StringGen('A')" },  # record sizes.
306                                    int =>         { sqlType => 'INTEGER',                        maxLen => 20,                   dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
307                                    string =>  { sqlType => 'VARCHAR(255)',               maxLen => 255,                  dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
308                                    text =>        { sqlType => 'TEXT',                           maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
309                                    date =>        { sqlType => 'BIGINT',                         maxLen => 80,                   dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
310                                    float =>       { sqlType => 'DOUBLE PRECISION',       maxLen => 40,                   dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
311                                    boolean => { sqlType => 'SMALLINT',                   maxLen => 1,                    dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
312                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   1, dataGen => "IntGen(0, 1)" },
313                               'key-string' =>                               'key-string' =>
314                                                           { sqlType => 'VARCHAR(40)',            maxLen => 40,                   dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
315                                   'name-string' =>                                   'name-string' =>
316                                                           { sqlType => 'VARCHAR(80)',            maxLen => 80,                   dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
317                                   'medium-string' =>                                   'medium-string' =>
318                                                           { sqlType => 'VARCHAR(160)',           maxLen => 160,                  dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
319                                  );                                  );
320    
321  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 145  Line 371 
371    
372  =head3 ShowMetaData  =head3 ShowMetaData
373    
374  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
375    
376  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
377  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 282  Line 508 
508                  # Separate out the source, the target, and the join clause.                  # Separate out the source, the target, and the join clause.
509                  $joinKey =~ m!^([^/]+)/(.+)$!;                  $joinKey =~ m!^([^/]+)/(.+)$!;
510                  my ($sourceRelation, $targetRelation) = ($1, $2);                  my ($sourceRelation, $targetRelation) = ($1, $2);
511                  Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(Joins => 4);
512                  my $source = $self->ComputeObjectSentence($sourceRelation);                  my $source = $self->ComputeObjectSentence($sourceRelation);
513                  my $target = $self->ComputeObjectSentence($targetRelation);                  my $target = $self->ComputeObjectSentence($targetRelation);
514                  my $clause = $joinTable->{$joinKey};                  my $clause = $joinTable->{$joinKey};
# Line 300  Line 526 
526    
527  =head3 DumpMetaData  =head3 DumpMetaData
528    
529  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
530    
531  Return a dump of the metadata structure.  Return a dump of the metadata structure.
532    
# Line 315  Line 541 
541    
542  =head3 CreateTables  =head3 CreateTables
543    
544  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
545    
546  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
547  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 327  Line 553 
553  sub CreateTables {  sub CreateTables {
554          # Get the parameters.          # Get the parameters.
555          my ($self) = @_;          my ($self) = @_;
556          my $metadata = $self->{_metaData};      # Get the relation names.
557          my $dbh = $self->{_dbh};      my @relNames = $self->GetTableNames();
558          # Loop through the entities.      # Loop through the relations.
559          my $entityHash = $metadata->{Entities};      for my $relationName (@relNames) {
         for my $entityName (keys %{$entityHash}) {  
                 my $entityData = $entityHash->{$entityName};  
                 # Tell the user what we're doing.  
                 Trace("Creating relations for entity $entityName.") if T(1);  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entityData->{Relations}}) {  
560                          # Create a table for this relation.                          # Create a table for this relation.
561                          $self->CreateTable($relationName);                          $self->CreateTable($relationName);
562                          Trace("Relation $relationName created.") if T(1);          Trace("Relation $relationName created.") if T(2);
                 }  
         }  
         # Loop through the relationships.  
         my $relationshipTable = $metadata->{Relationships};  
         for my $relationshipName (keys %{$metadata->{Relationships}}) {  
                 # Create a table for this relationship.  
                 Trace("Creating relationship $relationshipName.") if T(1);  
                 $self->CreateTable($relationshipName);  
563          }          }
564  }  }
565    
566  =head3 CreateTable  =head3 CreateTable
567    
568  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
569    
570  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
571    
# Line 363  Line 575 
575    
576  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
577    
578  =item $indexFlag  =item indexFlag
579    
580  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
581  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
582    
583    =item estimatedRows (optional)
584    
585    If specified, the estimated maximum number of rows for the relation. This
586    information allows the creation of tables using storage engines that are
587    faster but require size estimates, such as MyISAM.
588    
589  =back  =back
590    
591  =cut  =cut
592    
593  sub CreateTable {  sub CreateTable {
594          # Get the parameters.          # Get the parameters.
595          my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
596          # Get the database handle.          # Get the database handle.
597          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
598          # Get the relation data and determine whether or not the relation is primary.          # Get the relation data and determine whether or not the relation is primary.
# Line 398  Line 616 
616          # Insure the table is not already there.          # Insure the table is not already there.
617          $dbh->drop_table(tbl => $relationName);          $dbh->drop_table(tbl => $relationName);
618          Trace("Table $relationName dropped.") if T(2);          Trace("Table $relationName dropped.") if T(2);
619        # If there are estimated rows, create an estimate so we can take advantage of
620        # faster DB technologies.
621        my $estimation = undef;
622        if ($estimatedRows) {
623            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
624        }
625          # Create the table.          # Create the table.
626          Trace("Creating table $relationName: $fieldThing") if T(2);          Trace("Creating table $relationName: $fieldThing") if T(2);
627          $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
628          Trace("Relation $relationName created in database.") if T(2);          Trace("Relation $relationName created in database.") if T(2);
629          # If we want to build the indexes, we do it here.          # If we want to build the indexes, we do it here.
630          if ($indexFlag) {          if ($indexFlag) {
# Line 408  Line 632 
632          }          }
633  }  }
634    
635    =head3 VerifyFields
636    
637    C<< my $count = $erdb->VerifyFields($relName, \@fieldList); >>
638    
639    Run through the list of proposed field values, insuring that all the character fields are
640    below the maximum length. If any fields are too long, they will be truncated in place.
641    
642    =over 4
643    
644    =item relName
645    
646    Name of the relation for which the specified fields are destined.
647    
648    =item fieldList
649    
650    Reference to a list, in order, of the fields to be put into the relation.
651    
652    =item RETURN
653    
654    Returns the number of fields truncated.
655    
656    =back
657    
658    =cut
659    
660    sub VerifyFields {
661        # Get the parameters.
662        my ($self, $relName, $fieldList) = @_;
663        # Initialize the return value.
664        my $retVal = 0;
665        # Get the relation definition.
666        my $relData = $self->_FindRelation($relName);
667        # Get the list of field descriptors.
668        my $fieldTypes = $relData->{Fields};
669        my $fieldCount = scalar @{$fieldTypes};
670        # Loop through the two lists.
671        for (my $i = 0; $i < $fieldCount; $i++) {
672            # Get the type of the current field.
673            my $fieldType = $fieldTypes->[$i]->{type};
674            # If it's a character field, verify the length.
675            if ($fieldType =~ /string/) {
676                my $maxLen = $TypeTable{$fieldType}->{maxLen};
677                my $oldString = $fieldList->[$i];
678                if (length($oldString) > $maxLen) {
679                    # Here it's too big, so we truncate it.
680                    Trace("Truncating field $i in relation $relName to $maxLen characters from \"$oldString\".") if T(1);
681                    $fieldList->[$i] = substr $oldString, 0, $maxLen;
682                    $retVal++;
683                }
684            }
685        }
686        # Return the truncation count.
687        return $retVal;
688    }
689    
690  =head3 CreateIndex  =head3 CreateIndex
691    
692  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
693    
694  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
695  is the case in L</LoadTable>), it is best to create the indexes after the load. If that is  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
696  the case, then L</CreateTable> should be called with the index flag set to FALSE, and this  If that is the case, then L</CreateTable> should be called with the index flag set to
697  method used after the load to create the indexes for the table.  FALSE, and this method used after the load to create the indexes for the table.
698    
699  =cut  =cut
700    
# Line 436  Line 715 
715                  # Get the index's uniqueness flag.                  # Get the index's uniqueness flag.
716                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');                  my $unique = (exists $indexData->{Unique} ? $indexData->{Unique} : 'false');
717                  # Create the index.                  # Create the index.
718                  $dbh->create_index(idx => $indexName, tbl => $relationName, flds => $flds, unique => $unique);          my $rv = $dbh->create_index(idx => $indexName, tbl => $relationName,
719                                        flds => $flds, unique => $unique);
720            if ($rv) {
721                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);                  Trace("Index created: $indexName for $relationName ($flds)") if T(1);
722            } else {
723                Confess("Error creating index $indexName for $relationName using ($flds): " . $dbh->error_message());
724            }
725          }          }
726  }  }
727    
728  =head3 LoadTables  =head3 LoadTables
729    
730  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
731    
732  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
733  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 486  Line 770 
770          $directoryName =~ s!/\\$!!;          $directoryName =~ s!/\\$!!;
771          # Declare the return variable.          # Declare the return variable.
772          my $retVal = Stats->new();          my $retVal = Stats->new();
773          # Get the metadata structure.      # Get the relation names.
774          my $metaData = $self->{_metaData};      my @relNames = $self->GetTableNames();
775          # Loop through the entities.      for my $relationName (@relNames) {
         for my $entity (values %{$metaData->{Entities}}) {  
                 # Loop through the entity's relations.  
                 for my $relationName (keys %{$entity->{Relations}}) {  
776                          # Try to load this relation.                          # Try to load this relation.
777                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);                          my $result = $self->_LoadRelation($directoryName, $relationName, $rebuild);
778                          # Accumulate the statistics.                          # Accumulate the statistics.
779                          $retVal->Accumulate($result);                          $retVal->Accumulate($result);
780                  }                  }
         }  
         # Loop through the relationships.  
         for my $relationshipName (keys %{$metaData->{Relationships}}) {  
                 # Try to load this relationship's relation.  
                 my $result = $self->_LoadRelation($directoryName, $relationshipName, $rebuild);  
                 # Accumulate the statistics.  
                 $retVal->Accumulate($result);  
         }  
781          # Add the duration of the load to the statistical object.          # Add the duration of the load to the statistical object.
782          $retVal->Add('duration', gettimeofday - $startTime);          $retVal->Add('duration', gettimeofday - $startTime);
783          # Return the accumulated statistics.          # Return the accumulated statistics.
784          return $retVal;          return $retVal;
785  }  }
786    
787    
788  =head3 GetTableNames  =head3 GetTableNames
789    
790  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
791    
792  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
793    
# Line 530  Line 804 
804    
805  =head3 GetEntityTypes  =head3 GetEntityTypes
806    
807  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
808    
809  Return a list of the entity type names.  Return a list of the entity type names.
810    
# Line 545  Line 819 
819          return sort keys %{$entityList};          return sort keys %{$entityList};
820  }  }
821    
822    =head3 IsEntity
823    
824    C<< my $flag = $erdb->IsEntity($entityName); >>
825    
826    Return TRUE if the parameter is an entity name, else FALSE.
827    
828    =over 4
829    
830    =item entityName
831    
832    Object name to be tested.
833    
834    =item RETURN
835    
836    Returns TRUE if the specified string is an entity name, else FALSE.
837    
838    =back
839    
840    =cut
841    
842    sub IsEntity {
843        # Get the parameters.
844        my ($self, $entityName) = @_;
845        # Test to see if it's an entity.
846        return exists $self->{_metaData}->{Entities}->{$entityName};
847    }
848    
849  =head3 Get  =head3 Get
850    
851  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
852    
853  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
854  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 555  Line 856 
856  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
857  $genus.  $genus.
858    
859  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
860    
861  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
862  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
863    
864  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
865    
866  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
867  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 572  Line 873 
873  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
874  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
875    
876  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
877    
878  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
879  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
880  In particular, you can't specify any entity or relationship more than once, and if a  In particular, if a relationship is recursive, the path is determined by the order in which
881  relationship is recursive, the path is determined by the order in which the entity  the entity and the relationship appear. For example, consider a recursive relationship
882  and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  B<IsParentOf> which relates B<People> objects to other B<People> objects. If the join path is
 which relates B<People> objects to other B<People> objects. If the join path is  
883  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,
884  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  the join path is C<['IsParentOf', 'People']>, then the people returned will be children.
885    
886    If an entity or relationship is mentioned twice, the name for the second occurrence will
887    be suffixed with C<2>, the third occurrence will be suffixed with C<3>, and so forth. So,
888    for example, if we have C<['Feature', 'HasContig', 'Contig', 'HasContig']>, then the
889    B<to-link> field of the first B<HasContig> is specified as C<HasContig(to-link)>, while
890    the B<to-link> field of the second B<HasContig> is specified as C<HasContig2(to-link)>.
891    
892  =over 4  =over 4
893    
894  =item objectNames  =item objectNames
# Line 605  Line 911 
911    
912  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>  C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
913    
914    Note that the case is important. Only an uppercase "ORDER BY" with a single space will
915    be processed. The idea is to make it less likely to find the verb by accident.
916    
917  The rules for field references in a sort order are the same as those for field references in the  The rules for field references in a sort order are the same as those for field references in the
918  filter clause in general; however, odd things may happen if a sort field is from a secondary  filter clause in general; however, odd things may happen if a sort field is from a secondary
919  relation.  relation.
920    
921    Finally, you can limit the number of rows returned by adding a LIMIT clause. The LIMIT must
922    be the last thing in the filter clause, and it contains only the word "LIMIT" followed by
923    a positive number. So, for example
924    
925    C<< "Genome(genus) = ? ORDER BY Genome(species) LIMIT 10" >>
926    
927    will only return the first ten genomes for the specified genus. The ORDER BY clause is not
928    required. For example, to just get the first 10 genomes in the B<Genome> table, you could
929    use
930    
931    C<< "LIMIT 10" >>
932    
933  =item param1, param2, ..., paramN  =item param1, param2, ..., paramN
934    
935  Parameter values to be substituted into the filter clause.  Parameter values to be substituted into the filter clause.
# Line 624  Line 945 
945  sub Get {  sub Get {
946          # Get the parameters.          # Get the parameters.
947          my ($self, $objectNames, $filterClause, @params) = @_;          my ($self, $objectNames, $filterClause, @params) = @_;
948        # Adjust the list of object names to account for multiple occurrences of the
949        # same object. We start with a hash table keyed on object name that will
950        # return the object suffix. The first time an object is encountered it will
951        # not be found in the hash. The next time the hash will map the object name
952        # to 2, then 3, and so forth.
953        my %objectHash = ();
954        # This list will contain the object names as they are to appear in the
955        # FROM list.
956        my @fromList = ();
957        # This list contains the suffixed object name for each object. It is exactly
958        # parallel to the list in the $objectNames parameter.
959        my @mappedNameList = ();
960        # Finally, this hash translates from a mapped name to its original object name.
961        my %mappedNameHash = ();
962        # Now we create the lists. Note that for every single name we push something into
963        # @fromList and @mappedNameList. This insures that those two arrays are exactly
964        # parallel to $objectNames.
965        for my $objectName (@{$objectNames}) {
966            # Get the next suffix for this object.
967            my $suffix = $objectHash{$objectName};
968            if (! $suffix) {
969                # Here we are seeing the object for the first time. The object name
970                # is used as is.
971                push @mappedNameList, $objectName;
972                push @fromList, $objectName;
973                $mappedNameHash{$objectName} = $objectName;
974                # Denote the next suffix will be 2.
975                $objectHash{$objectName} = 2;
976            } else {
977                # Here we've seen the object before. We construct a new name using
978                # the suffix from the hash and update the hash.
979                my $mappedName = "$objectName$suffix";
980                $objectHash{$objectName} = $suffix + 1;
981                # The FROM list has the object name followed by the mapped name. This
982                # tells SQL it's still the same table, but we're using a different name
983                # for it to avoid confusion.
984                push @fromList, "$objectName $mappedName";
985                # The mapped-name list contains the real mapped name.
986                push @mappedNameList, $mappedName;
987                # Finally, enable us to get back from the mapped name to the object name.
988                $mappedNameHash{$mappedName} = $objectName;
989            }
990        }
991          # Construct the SELECT statement. The general pattern is          # Construct the SELECT statement. The general pattern is
992          #          #
993          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN          # SELECT name1.*, name2.*, ... nameN.* FROM name1, name2, ... nameN
994          #          #
995          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
996          my $command = "SELECT DISTINCT " . join('.*, ', @{$objectNames}) . ".* FROM " .      my $command = "SELECT DISTINCT " . join('.*, ', @mappedNameList) . ".* FROM " .
997                                  join(', ', @{$objectNames});                  join(', ', @fromList);
998          # Check for a filter clause.          # Check for a filter clause.
999          if ($filterClause) {          if ($filterClause) {
1000                  # Here we have one, so we convert its field names and add it to the query. First,                  # Here we have one, so we convert its field names and add it to the query. First,
# Line 638  Line 1002 
1002                  my $filterString = $filterClause;                  my $filterString = $filterClause;
1003                  # Next, we sort the object names by length. This helps protect us from finding                  # Next, we sort the object names by length. This helps protect us from finding
1004                  # object names inside other object names when we're doing our search and replace.                  # object names inside other object names when we're doing our search and replace.
1005                  my @sortedNames = sort { length($b) - length($a) } @{$objectNames};          my @sortedNames = sort { length($b) - length($a) } @mappedNameList;
1006                  # We will also keep a list of conditions to add to the WHERE clause in order to link                  # We will also keep a list of conditions to add to the WHERE clause in order to link
1007                  # entities and relationships as well as primary relations to secondary ones.                  # entities and relationships as well as primary relations to secondary ones.
1008                  my @joinWhere = ();                  my @joinWhere = ();
1009                  # The final preparatory step is to create a hash table of relation names. The                  # The final preparatory step is to create a hash table of relation names. The
1010                  # table begins with the relation names already in the SELECT command.          # table begins with the relation names already in the SELECT command. We may
1011                  my %fromNames = ();          # need to add relations later if there is filtering on a field in a secondary
1012                  for my $objectName (@sortedNames) {          # relation. The secondary relations are the ones that contain multiply-
1013                          $fromNames{$objectName} = 1;          # occurring or optional fields.
1014                  }          my %fromNames = map { $_ => 1 } @sortedNames;
1015                  # We are ready to begin. We loop through the object names, replacing each                  # We are ready to begin. We loop through the object names, replacing each
1016                  # object name's field references by the corresponding SQL field reference.                  # object name's field references by the corresponding SQL field reference.
1017                  # Along the way, if we find a secondary relation, we will need to add it                  # Along the way, if we find a secondary relation, we will need to add it
1018                  # to the FROM clause.                  # to the FROM clause.
1019                  for my $objectName (@sortedNames) {          for my $mappedName (@sortedNames) {
1020                          # Get the length of the object name plus 2. This is the value we add to the                          # Get the length of the object name plus 2. This is the value we add to the
1021                          # size of the field name to determine the size of the field reference as a                          # size of the field name to determine the size of the field reference as a
1022                          # whole.                          # whole.
1023                          my $nameLength = 2 + length $objectName;              my $nameLength = 2 + length $mappedName;
1024                # Get the real object name for this mapped name.
1025                my $objectName = $mappedNameHash{$mappedName};
1026                          # Get the object's field list.                          # Get the object's field list.
1027                          my $fieldList = $self->_GetFieldTable($objectName);              my $fieldList = $self->GetFieldTable($objectName);
1028                          # Find the field references for this object.                          # Find the field references for this object.
1029                          while ($filterString =~ m/$objectName\(([^)]*)\)/g) {              while ($filterString =~ m/$mappedName\(([^)]*)\)/g) {
1030                                  # At this point, $1 contains the field name, and the current position                                  # At this point, $1 contains the field name, and the current position
1031                                  # is set immediately after the final parenthesis. We pull out the name of                                  # is set immediately after the final parenthesis. We pull out the name of
1032                                  # the field and the position and length of the field reference as a whole.                                  # the field and the position and length of the field reference as a whole.
# Line 673  Line 1039 
1039                                  } else {                                  } else {
1040                                          # Get the field's relation.                                          # Get the field's relation.
1041                                          my $relationName = $fieldList->{$fieldName}->{relation};                                          my $relationName = $fieldList->{$fieldName}->{relation};
1042                        # Now we have a secondary relation. We need to insure it matches the
1043                        # mapped name of the primary relation. First we peel off the suffix
1044                        # from the mapped name.
1045                        my $mappingSuffix = substr $mappedName, length($objectName);
1046                        # Put the mapping suffix onto the relation name to get the
1047                        # mapped relation name.
1048                        my $mappedRelationName = "$relationName$mappingSuffix";
1049                                          # Insure the relation is in the FROM clause.                                          # Insure the relation is in the FROM clause.
1050                                          if (!exists $fromNames{$relationName}) {                      if (!exists $fromNames{$mappedRelationName}) {
1051                                                  # Add the relation to the FROM clause.                                                  # Add the relation to the FROM clause.
1052                            if ($mappedRelationName eq $relationName) {
1053                                # The name is un-mapped, so we add it without
1054                                # any frills.
1055                                                  $command .= ", $relationName";                                                  $command .= ", $relationName";
                                                 # Create its join sub-clause.  
1056                                                  push @joinWhere, "$objectName.id = $relationName.id";                                                  push @joinWhere, "$objectName.id = $relationName.id";
1057                                                  # Denote we have it available for future fields.                          } else {
1058                                                  $fromNames{$relationName} = 1;                              # Here we have a mapping situation.
1059                                $command .= ", $relationName $mappedRelationName";
1060                                push @joinWhere, "$mappedRelationName.id = $mappedName.id";
1061                            }
1062                            # Denote we have this relation available for future fields.
1063                            $fromNames{$mappedRelationName} = 1;
1064                                          }                                          }
1065                                          # Form an SQL field reference from the relation name and the field name.                                          # Form an SQL field reference from the relation name and the field name.
1066                                          my $sqlReference = "$relationName." . _FixName($fieldName);                      my $sqlReference = "$mappedRelationName." . _FixName($fieldName);
1067                                          # Put it into the filter string in place of the old value.                                          # Put it into the filter string in place of the old value.
1068                                          substr($filterString, $pos, $len) = $sqlReference;                                          substr($filterString, $pos, $len) = $sqlReference;
1069                                          # Reposition the search.                                          # Reposition the search.
# Line 695  Line 1075 
1075                  # is more than one object in the object list. We start with the first object and                  # is more than one object in the object list. We start with the first object and
1076                  # run through the objects after it. Note also that we make a safety copy of the                  # run through the objects after it. Note also that we make a safety copy of the
1077                  # list before running through it.                  # list before running through it.
1078                  my @objectList = @{$objectNames};          my @mappedObjectList = @mappedNameList;
1079                  my $lastObject = shift @objectList;          my $lastMappedObject = shift @mappedObjectList;
1080                  # Get the join table.                  # Get the join table.
1081                  my $joinTable = $self->{_metaData}->{Joins};                  my $joinTable = $self->{_metaData}->{Joins};
1082                  # Loop through the object list.                  # Loop through the object list.
1083                  for my $thisObject (@objectList) {          for my $thisMappedObject (@mappedObjectList) {
1084                          # Look for a join.              # Look for a join using the real object names.
1085                my $lastObject = $mappedNameHash{$lastMappedObject};
1086                my $thisObject = $mappedNameHash{$thisMappedObject};
1087                          my $joinKey = "$lastObject/$thisObject";                          my $joinKey = "$lastObject/$thisObject";
1088                          if (!exists $joinTable->{$joinKey}) {                          if (!exists $joinTable->{$joinKey}) {
1089                                  # Here there's no join, so we throw an error.                                  # Here there's no join, so we throw an error.
1090                                  Confess("No join exists to connect from $lastObject to $thisObject.");                  Confess("No join exists to connect from $lastMappedObject to $thisMappedObject.");
1091                          } else {                          } else {
1092                                  # Get the join clause and add it to the WHERE list.                  # Get the join clause.
1093                                  push @joinWhere, $joinTable->{$joinKey};                  my $unMappedJoin = $joinTable->{$joinKey};
1094                    # Fix the names.
1095                    $unMappedJoin =~ s/$lastObject/$lastMappedObject/;
1096                    $unMappedJoin =~ s/$thisObject/$thisMappedObject/;
1097                    push @joinWhere, $unMappedJoin;
1098                                  # Save this object as the last object for the next iteration.                                  # Save this object as the last object for the next iteration.
1099                                  $lastObject = $thisObject;                  $lastMappedObject = $thisMappedObject;
1100                          }                          }
1101                  }                  }
1102                  # Now we need to handle the whole ORDER BY thing. We'll put the order by clause          # Now we need to handle the whole ORDER BY / LIMIT thing. The important part
1103                  # in the following variable.          # here is we want the filter clause to be empty if there's no WHERE filter.
1104            # We'll put the ORDER BY / LIMIT clauses in the following variable.
1105                  my $orderClause = "";                  my $orderClause = "";
1106                  # Locate the ORDER BY verb (if any).          # Locate the ORDER BY or LIMIT verbs (if any). We use a non-greedy
1107                  if ($filterString =~ m/^(.*)ORDER BY/g) {          # operator so that we find the first occurrence of either verb.
1108                          # Here we have an ORDER BY verb. Split it off of the filter string.          if ($filterString =~ m/^(.*?)\s*(ORDER BY|LIMIT)/g) {
1109                # Here we have an ORDER BY or LIMIT verb. Split it off of the filter string.
1110                          my $pos = pos $filterString;                          my $pos = pos $filterString;
1111                          $orderClause = substr($filterString, $pos);              $orderClause = $2 . substr($filterString, $pos);
1112                          $filterString = $1;                          $filterString = $1;
1113                  }                  }
1114                  # Add the filter and the join clauses (if any) to the SELECT command.                  # Add the filter and the join clauses (if any) to the SELECT command.
# Line 730  Line 1118 
1118                  if (@joinWhere) {                  if (@joinWhere) {
1119                          $command .= " WHERE " . join(' AND ', @joinWhere);                          $command .= " WHERE " . join(' AND ', @joinWhere);
1120                  }                  }
1121                  # Add the sort clause (if any) to the SELECT command.          # Add the sort or limit clause (if any) to the SELECT command.
1122                  if ($orderClause) {                  if ($orderClause) {
1123                          $command .= " ORDER BY $orderClause";              $command .= " $orderClause";
1124                  }                  }
1125          }          }
1126          Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(SQL => 4);
1127          Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(SQL => 4) && (@params > 0));
1128          my $sth = $dbh->prepare_command($command);          my $sth = $dbh->prepare_command($command);
1129          # Execute it with the parameters bound in.          # Execute it with the parameters bound in.
1130          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
1131        # Now we create the relation map, which enables DBQuery to determine the order, name
1132        # and mapped name for each object in the query.
1133        my @relationMap = ();
1134        for my $mappedName (@mappedNameList) {
1135            push @relationMap, [$mappedName, $mappedNameHash{$mappedName}];
1136        }
1137          # Return the statement object.          # Return the statement object.
1138          my $retVal = DBQuery::_new($self, $sth, @{$objectNames});      my $retVal = DBQuery::_new($self, $sth, \@relationMap);
1139        return $retVal;
1140    }
1141    
1142    =head3 Delete
1143    
1144    C<< my $stats = $erdb->Delete($entityName, $objectID); >>
1145    
1146    Delete an entity instance from the database. The instance is deleted along with all entity and
1147    relationship instances dependent on it. The idea of dependence here is recursive. An object is
1148    always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many
1149    relationship connected to a dependent entity or the "to" entity connected to a 1-to-many
1150    dependent relationship.
1151    
1152    =over 4
1153    
1154    =item entityName
1155    
1156    Name of the entity type for the instance being deleted.
1157    
1158    =item objectID
1159    
1160    ID of the entity instance to be deleted. If the ID contains a wild card character (C<%>),
1161    then it is presumed to by a LIKE pattern.
1162    
1163    =item testFlag
1164    
1165    If TRUE, the delete statements will be traced without being executed.
1166    
1167    =item RETURN
1168    
1169    Returns a statistics object indicating how many records of each particular table were
1170    deleted.
1171    
1172    =back
1173    
1174    =cut
1175    #: Return Type $%;
1176    sub Delete {
1177        # Get the parameters.
1178        my ($self, $entityName, $objectID, $testFlag) = @_;
1179        # Declare the return variable.
1180        my $retVal = Stats->new();
1181        # Get the DBKernel object.
1182        my $db = $self->{_dbh};
1183        # We're going to generate all the paths branching out from the starting entity. One of
1184        # the things we have to be careful about is preventing loops. We'll use a hash to
1185        # determine if we've hit a loop.
1186        my %alreadyFound = ();
1187        # These next lists will serve as our result stack. We start by pushing object lists onto
1188        # the stack, and then popping them off to do the deletes. This means the deletes will
1189        # start with the longer paths before getting to the shorter ones. That, in turn, makes
1190        # sure we don't delete records that might be needed to forge relationships back to the
1191        # original item. We have two lists-- one for TO-relationships, and one for
1192        # FROM-relationships and entities.
1193        my @fromPathList = ();
1194        my @toPathList = ();
1195        # This final hash is used to remember what work still needs to be done. We push paths
1196        # onto the list, then pop them off to extend the paths. We prime it with the starting
1197        # point. Note that we will work hard to insure that the last item on a path in the
1198        # TODO list is always an entity.
1199        my @todoList = ([$entityName]);
1200        while (@todoList) {
1201            # Get the current path.
1202            my $current = pop @todoList;
1203            # Copy it into a list.
1204            my @stackedPath = @{$current};
1205            # Pull off the last item on the path. It will always be an entity.
1206            my $entityName = pop @stackedPath;
1207            # Add it to the alreadyFound list.
1208            $alreadyFound{$entityName} = 1;
1209            # Get the entity data.
1210            my $entityData = $self->_GetStructure($entityName);
1211            # The first task is to loop through the entity's relation. A DELETE command will
1212            # be needed for each of them.
1213            my $relations = $entityData->{Relations};
1214            for my $relation (keys %{$relations}) {
1215                my @augmentedList = (@stackedPath, $relation);
1216                push @fromPathList, \@augmentedList;
1217            }
1218            # Now we need to look for relationships connected to this entity.
1219            my $relationshipList = $self->{_metaData}->{Relationships};
1220            for my $relationshipName (keys %{$relationshipList}) {
1221                my $relationship = $relationshipList->{$relationshipName};
1222                # Check the FROM field. We're only interested if it's us.
1223                if ($relationship->{from} eq $entityName) {
1224                    # Add the path to this relationship.
1225                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1226                    push @fromPathList, \@augmentedList;
1227                    # Check the arity. If it's MM we're done. If it's 1M
1228                    # and the target hasn't been seen yet, we want to
1229                    # stack the entity for future processing.
1230                    if ($relationship->{arity} eq '1M') {
1231                        my $toEntity = $relationship->{to};
1232                        if (! exists $alreadyFound{$toEntity}) {
1233                            # Here we have a new entity that's dependent on
1234                            # the current entity, so we need to stack it.
1235                            my @stackList = (@augmentedList, $toEntity);
1236                            push @fromPathList, \@stackList;
1237                        } else {
1238                            Trace("$toEntity ignored because it occurred previously.") if T(4);
1239                        }
1240                    }
1241                }
1242                # Now check the TO field. In this case only the relationship needs
1243                # deletion.
1244                if ($relationship->{to} eq $entityName) {
1245                    my @augmentedList = (@stackedPath, $entityName, $relationshipName);
1246                    push @toPathList, \@augmentedList;
1247                }
1248            }
1249        }
1250        # Create the first qualifier for the WHERE clause. This selects the
1251        # keys of the primary entity records to be deleted. When we're deleting
1252        # from a dependent table, we construct a join page from the first qualifier
1253        # to the table containing the dependent records to delete.
1254        my $qualifier = ($objectID =~ /%/ ? "LIKE ?" : "= ?");
1255        # We need to make two passes. The first is through the to-list, and
1256        # the second through the from-list. The from-list is second because
1257        # the to-list may need to pass through some of the entities the
1258        # from-list would delete.
1259        my %stackList = ( from_link => \@fromPathList, to_link => \@toPathList );
1260        # Now it's time to do the deletes. We do it in two passes.
1261        for my $keyName ('to_link', 'from_link') {
1262            # Get the list for this key.
1263            my @pathList = @{$stackList{$keyName}};
1264            Trace(scalar(@pathList) . " entries in path list for $keyName.") if T(3);
1265            # Loop through this list.
1266            while (my $path = pop @pathList) {
1267                # Get the table whose rows are to be deleted.
1268                my @pathTables = @{$path};
1269                # Start the DELETE statement. We need to call DBKernel because the
1270                # syntax of a DELETE-USING varies among DBMSs.
1271                my $target = $pathTables[$#pathTables];
1272                my $stmt = $db->SetUsing(@pathTables);
1273                # Now start the WHERE. The first thing is the ID field from the starting table. That
1274                # starting table will either be the entity relation or one of the entity's
1275                # sub-relations.
1276                $stmt .= " WHERE $pathTables[0].id $qualifier";
1277                # Now we run through the remaining entities in the path, connecting them up.
1278                for (my $i = 1; $i <= $#pathTables; $i += 2) {
1279                    # Connect the current relationship to the preceding entity.
1280                    my ($entity, $rel) = @pathTables[$i-1,$i];
1281                    # The style of connection depends on the direction of the relationship.
1282                    $stmt .= " AND $entity.id = $rel.$keyName";
1283                    if ($i + 1 <= $#pathTables) {
1284                        # Here there's a next entity, so connect that to the relationship's
1285                        # to-link.
1286                        my $entity2 = $pathTables[$i+1];
1287                        $stmt .= " AND $rel.to_link = $entity2.id";
1288                    }
1289                }
1290                # Now we have our desired DELETE statement.
1291                if ($testFlag) {
1292                    # Here the user wants to trace without executing.
1293                    Trace($stmt) if T(0);
1294                } else {
1295                    # Here we can delete. Note that the SQL method dies with a confessing
1296                    # if an error occurs, so we just go ahead and do it.
1297                    Trace("Executing delete from $target using '$objectID'.") if T(3);
1298                    my $rv = $db->SQL($stmt, 0, $objectID);
1299                    # Accumulate the statistics for this delete. The only rows deleted
1300                    # are from the target table, so we use its name to record the
1301                    # statistic.
1302                    $retVal->Add($target, $rv);
1303                }
1304            }
1305        }
1306        # Return the result.
1307          return $retVal;          return $retVal;
1308  }  }
1309    
1310  =head3 GetList  =head3 GetList
1311    
1312  C<< my @dbObjects = $database->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
1313    
1314  Return a list of object descriptors for the specified objects as determined by the  Return a list of object descriptors for the specified objects as determined by the
1315  specified filter clause.  specified filter clause.
1316    
1317  This method is essentially the same as L</Get> except it returns a list of objects rather  This method is essentially the same as L</Get> except it returns a list of objects rather
1318  that a query object that can be used to get the results one record at a time.  than a query object that can be used to get the results one record at a time.
   
 =over 4  
1319    
1320  =over 4  =over 4
1321    
# Line 812  Line 1372 
1372    
1373  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1374    
1375  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1376    
1377  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1378    
# Line 847  Line 1407 
1407    
1408  =head3 DumpRelations  =head3 DumpRelations
1409    
1410  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1411    
1412  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1413  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 889  Line 1449 
1449    
1450  =head3 InsertObject  =head3 InsertObject
1451    
1452  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1453    
1454  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1455  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 898  Line 1458 
1458  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1459  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1460    
1461  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1462    
1463  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1464  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1465    
1466  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1467    
1468  =over 4  =over 4
1469    
# Line 1028  Line 1588 
1588    
1589  =head3 LoadTable  =head3 LoadTable
1590    
1591  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1592    
1593  Load data from a tab-delimited file into a specified table, optionally re-creating the table first.  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1594    first.
1595    
1596  =over 4  =over 4
1597    
# Line 1048  Line 1609 
1609    
1610  =item RETURN  =item RETURN
1611    
1612  Returns a statistical object containing the number of records read and a list of the error messages.  Returns a statistical object containing a list of the error messages.
1613    
1614  =back  =back
1615    
# Line 1059  Line 1620 
1620          # Create the statistical return object.          # Create the statistical return object.
1621          my $retVal = _GetLoadStats();          my $retVal = _GetLoadStats();
1622          # Trace the fact of the load.          # Trace the fact of the load.
1623          Trace("Loading table $relationName from $fileName") if T(1);      Trace("Loading table $relationName from $fileName") if T(2);
1624          # Get the database handle.          # Get the database handle.
1625          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1626        # Get the input file size.
1627        my $fileSize = -s $fileName;
1628          # Get the relation data.          # Get the relation data.
1629          my $relation = $self->_FindRelation($relationName);          my $relation = $self->_FindRelation($relationName);
1630          # Check the truncation flag.          # Check the truncation flag.
1631          if ($truncateFlag) {          if ($truncateFlag) {
1632                  Trace("Creating table $relationName") if T(1);          Trace("Creating table $relationName") if T(2);
1633            # Compute the row count estimate. We take the size of the load file,
1634            # divide it by the estimated row size, and then multiply by 1.5 to
1635            # leave extra room. We postulate a minimum row count of 1000 to
1636            # prevent problems with incoming empty load files.
1637            my $rowSize = $self->EstimateRowSize($relationName);
1638            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1639                  # Re-create the table without its index.                  # Re-create the table without its index.
1640                  $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1641            # If this is a pre-index DBMS, create the index here.
1642            if ($dbh->{_preIndex}) {
1643                eval {
1644                    $self->CreateIndex($relationName);
1645                };
1646                if ($@) {
1647                    $retVal->AddMessage($@);
1648                }
1649            }
1650          }          }
         # Determine whether or not this is a primary relation. Primary relations have an extra  
         # field indicating whether or not a given object is new or was loaded from the flat files.  
         my $primary = $self->_IsPrimary($relationName);  
         # Get the number of fields in this relation.  
         my @fieldList = @{$relation->{Fields}};  
         my $fieldCount = @fieldList;  
         # Record the number of expected fields.  
         my $expectedFields = $fieldCount + ($primary ? 1 : 0);  
         # Start a database transaction.  
         $dbh->begin_tran;  
         # Open the relation file. We need to create a cleaned-up copy before loading.  
         open TABLEIN, '<', $fileName;  
         my $tempName = "$fileName.tbl";  
         open TABLEOUT, '>', $tempName;  
         # Loop through the file.  
         while (<TABLEIN>) {  
                 # Chop off the new-line character.  
                 my $record = $_;  
                 chomp $record;  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Eliminate any trailing tabs.  
             chop $record while substr($record, -1) eq "\t";  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 $record .= "\t0";  
             }  
             # Write the record.  
             print TABLEOUT "$record\n";  
             # Count the record read.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         }  
         }  
         # Close the files.  
         close TABLEIN;  
         close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(4);  
1651      # Load the table.      # Load the table.
1652          my $rv;          my $rv;
1653          eval {          eval {
1654                  $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1655          };          };
1656          if (!defined $rv) {          if (!defined $rv) {
1657          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1658          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1659                  Trace("Table load failed for $relationName.") if T(1);                  Trace("Table load failed for $relationName.") if T(1);
1660          } else {          } else {
1661                  # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1662                  Trace("$retVal->{records} records read for $relationName.") if T(1);          $retVal->Add("tables");
1663            my $size = -s $fileName;
1664            Trace("$size bytes loaded into $relationName.") if T(2);
1665                  # If we're rebuilding, we need to create the table indexes.                  # If we're rebuilding, we need to create the table indexes.
1666                  if ($truncateFlag) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1667                          eval {                          eval {
1668                                  $self->CreateIndex($relationName);                                  $self->CreateIndex($relationName);
1669                          };                          };
# Line 1134  Line 1672 
1672                          }                          }
1673                  }                  }
1674          }          }
1675          # Commit the database changes.      # Analyze the table to improve performance.
1676          $dbh->commit_tran;      $dbh->vacuum_it($relationName);
         # Delete the temporary file.  
         unlink $tempName;  
1677          # Return the statistics.          # Return the statistics.
1678          return $retVal;          return $retVal;
1679  }  }
1680    
1681  =head3 GenerateEntity  =head3 GenerateEntity
1682    
1683  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1684    
1685  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1686  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1202  Line 1738 
1738    
1739  =head3 GetEntity  =head3 GetEntity
1740    
1741  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1742    
1743  Return an object describing the entity instance with a specified ID.  Return an object describing the entity instance with a specified ID.
1744    
# Line 1238  Line 1774 
1774    
1775  =head3 GetEntityValues  =head3 GetEntityValues
1776    
1777  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1778    
1779  Return a list of values from a specified entity instance.  Return a list of values from a specified entity instance.
1780    
# Line 1279  Line 1815 
1815          return @retVal;          return @retVal;
1816  }  }
1817    
1818    =head3 GetAll
1819    
1820    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1821    
1822    Return a list of values taken from the objects returned by a query. The first three
1823    parameters correspond to the parameters of the L</Get> method. The final parameter is
1824    a list of the fields desired from each record found by the query. The field name
1825    syntax is the standard syntax used for fields in the B<ERDB> system--
1826    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
1827    or relationship and I<fieldName> is the name of the field.
1828    
1829    The list returned will be a list of lists. Each element of the list will contain
1830    the values returned for the fields specified in the fourth parameter. If one of the
1831    fields specified returns multiple values, they are flattened in with the rest. For
1832    example, the following call will return a list of the features in a particular
1833    spreadsheet cell, and each feature will be represented by a list containing the
1834    feature ID followed by all of its aliases.
1835    
1836    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1837    
1838    =over 4
1839    
1840    =item objectNames
1841    
1842    List containing the names of the entity and relationship objects to be retrieved.
1843    
1844    =item filterClause
1845    
1846    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1847    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1848    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1849    parameter list as additional parameters. The fields in a filter clause can come from primary
1850    entity relations, relationship relations, or secondary entity relations; however, all of the
1851    entities and relationships involved must be included in the list of object names.
1852    
1853    =item parameterList
1854    
1855    List of the parameters to be substituted in for the parameters marks in the filter clause.
1856    
1857    =item fields
1858    
1859    List of the fields to be returned in each element of the list returned.
1860    
1861    =item count
1862    
1863    Maximum number of records to return. If omitted or 0, all available records will be returned.
1864    
1865    =item RETURN
1866    
1867    Returns a list of list references. Each element of the return list contains the values for the
1868    fields specified in the B<fields> parameter.
1869    
1870    =back
1871    
1872    =cut
1873    #: Return Type @@;
1874    sub GetAll {
1875        # Get the parameters.
1876        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
1877        # Translate the parameters from a list reference to a list. If the parameter
1878        # list is a scalar we convert it into a singleton list.
1879        my @parmList = ();
1880        if (ref $parameterList eq "ARRAY") {
1881            @parmList = @{$parameterList};
1882        } else {
1883            push @parmList, $parameterList;
1884        }
1885        # Insure the counter has a value.
1886        if (!defined $count) {
1887            $count = 0;
1888        }
1889        # Add the row limit to the filter clause.
1890        if ($count > 0) {
1891            $filterClause .= " LIMIT $count";
1892        }
1893        # Create the query.
1894        my $query = $self->Get($objectNames, $filterClause, @parmList);
1895        # Set up a counter of the number of records read.
1896        my $fetched = 0;
1897        # Loop through the records returned, extracting the fields. Note that if the
1898        # counter is non-zero, we stop when the number of records read hits the count.
1899        my @retVal = ();
1900        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
1901            my @rowData = $row->Values($fields);
1902            push @retVal, \@rowData;
1903            $fetched++;
1904        }
1905        # Return the resulting list.
1906        return @retVal;
1907    }
1908    
1909    =head3 EstimateRowSize
1910    
1911    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1912    
1913    Estimate the row size of the specified relation. The estimated row size is computed by adding
1914    up the average length for each data type.
1915    
1916    =over 4
1917    
1918    =item relName
1919    
1920    Name of the relation whose estimated row size is desired.
1921    
1922    =item RETURN
1923    
1924    Returns an estimate of the row size for the specified relation.
1925    
1926    =back
1927    
1928    =cut
1929    #: Return Type $;
1930    sub EstimateRowSize {
1931        # Get the parameters.
1932        my ($self, $relName) = @_;
1933        # Declare the return variable.
1934        my $retVal = 0;
1935        # Find the relation descriptor.
1936        my $relation = $self->_FindRelation($relName);
1937        # Get the list of fields.
1938        for my $fieldData (@{$relation->{Fields}}) {
1939            # Get the field type and add its length.
1940            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1941            $retVal += $fieldLen;
1942        }
1943        # Return the result.
1944        return $retVal;
1945    }
1946    
1947    =head3 GetFieldTable
1948    
1949    C<< my $fieldHash = $self->GetFieldTable($objectnName); >>
1950    
1951    Get the field structure for a specified entity or relationship.
1952    
1953    =over 4
1954    
1955    =item objectName
1956    
1957    Name of the desired entity or relationship.
1958    
1959    =item RETURN
1960    
1961    The table containing the field descriptors for the specified object.
1962    
1963    =back
1964    
1965    =cut
1966    
1967    sub GetFieldTable {
1968        # Get the parameters.
1969        my ($self, $objectName) = @_;
1970        # Get the descriptor from the metadata.
1971        my $objectData = $self->_GetStructure($objectName);
1972        # Return the object's field table.
1973        return $objectData->{Fields};
1974    }
1975    
1976    =head3 GetUsefulCrossValues
1977    
1978    C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >>
1979    
1980    Return a list of the useful attributes that would be returned by a B<Cross> call
1981    from an entity of the source entity type through the specified relationship. This
1982    means it will return the fields of the target entity type and the intersection data
1983    fields in the relationship. Only primary table fields are returned. In other words,
1984    the field names returned will be for fields where there is always one and only one
1985    value.
1986    
1987    =over 4
1988    
1989    =item sourceEntity
1990    
1991    Name of the entity from which the relationship crossing will start.
1992    
1993    =item relationship
1994    
1995    Name of the relationship being crossed.
1996    
1997    =item RETURN
1998    
1999    Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>.
2000    
2001    =back
2002    
2003    =cut
2004    #: Return Type @;
2005    sub GetUsefulCrossValues {
2006        # Get the parameters.
2007        my ($self, $sourceEntity, $relationship) = @_;
2008        # Declare the return variable.
2009        my @retVal = ();
2010        # Determine the target entity for the relationship. This is whichever entity is not
2011        # the source entity. So, if the source entity is the FROM, we'll get the name of
2012        # the TO, and vice versa.
2013        my $relStructure = $self->_GetStructure($relationship);
2014        my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from");
2015        my $targetEntity = $relStructure->{$targetEntityType};
2016        # Get the field table for the entity.
2017        my $entityFields = $self->GetFieldTable($targetEntity);
2018        # The field table is a hash. The hash key is the field name. The hash value is a structure.
2019        # For the entity fields, the key aspect of the target structure is that the {relation} value
2020        # must match the entity name.
2021        my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity }
2022                            keys %{$entityFields};
2023        # Push the fields found onto the return variable.
2024        push @retVal, sort @fieldList;
2025        # Get the field table for the relationship.
2026        my $relationshipFields = $self->GetFieldTable($relationship);
2027        # Here we have a different rule. We want all the fields other than "from-link" and "to-link".
2028        # This may end up being an empty set.
2029        my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" }
2030                            keys %{$relationshipFields};
2031        # Push these onto the return list.
2032        push @retVal, sort @fieldList2;
2033        # Return the result.
2034        return @retVal;
2035    }
2036    
2037  =head2 Internal Utility Methods  =head2 Internal Utility Methods
2038    
2039  =head3 GetLoadStats  =head3 GetLoadStats
# Line 1290  Line 2045 
2045  =cut  =cut
2046    
2047  sub _GetLoadStats {  sub _GetLoadStats {
2048          return Stats->new('records');      return Stats->new();
2049  }  }
2050    
2051  =head3 GenerateFields  =head3 GenerateFields
# Line 1485  Line 2240 
2240          return $objectData->{Relations};          return $objectData->{Relations};
2241  }  }
2242    
 =head3 GetFieldTable  
   
 Get the field structure for a specified entity or relationship.  
   
 This is an instance method.  
   
 =over 4  
   
 =item objectName  
   
 Name of the desired entity or relationship.  
   
 =item RETURN  
   
 The table containing the field descriptors for the specified object.  
   
 =back  
   
 =cut  
   
 sub _GetFieldTable {  
         # Get the parameters.  
         my ($self, $objectName) = @_;  
         # Get the descriptor from the metadata.  
         my $objectData = $self->_GetStructure($objectName);  
         # Return the object's field table.  
         return $objectData->{Fields};  
 }  
   
2243  =head3 ValidateFieldNames  =head3 ValidateFieldNames
2244    
2245  Determine whether or not the field names are valid. A description of the problems with the names  Determine whether or not the field names are valid. A description of the problems with the names
# Line 1654  Line 2380 
2380  sub _LoadMetaData {  sub _LoadMetaData {
2381          # Get the parameters.          # Get the parameters.
2382          my ($filename) = @_;          my ($filename) = @_;
2383        Trace("Reading Sprout DBD from $filename.") if T(2);
2384          # Slurp the XML file into a variable. Extensive use of options is used to insure we          # Slurp the XML file into a variable. Extensive use of options is used to insure we
2385          # get the exact structure we want.          # get the exact structure we want.
2386          my $metadata = XML::Simple::XMLin($filename,          my $metadata = XML::Simple::XMLin($filename,
# Line 1681  Line 2408 
2408          for my $entityName (keys %{$entityList}) {          for my $entityName (keys %{$entityList}) {
2409                  my $entityStructure = $entityList->{$entityName};                  my $entityStructure = $entityList->{$entityName};
2410                  #                  #
2411                  # The first step is to run creating all the entity's default values. For C<Field> elements,          # The first step is to create all the entity's default values. For C<Field> elements,
2412                  # the relation name must be added where it is not specified. For relationships,                  # the relation name must be added where it is not specified. For relationships,
2413                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>
2414                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute
# Line 1860  Line 2587 
2587                  my @fromList = ();                  my @fromList = ();
2588                  my @toList = ();                  my @toList = ();
2589                  my @bothList = ();                  my @bothList = ();
2590                  Trace("Join table build for $entityName.") if T(3);          Trace("Join table build for $entityName.") if T(metadata => 4);
2591                  for my $relationshipName (keys %{$relationshipList}) {                  for my $relationshipName (keys %{$relationshipList}) {
2592                          my $relationship = $relationshipList->{$relationshipName};                          my $relationship = $relationshipList->{$relationshipName};
2593                          # Determine if this relationship has our entity in one of its link fields.                          # Determine if this relationship has our entity in one of its link fields.
2594                          my $fromEntity = $relationship->{from};                          my $fromEntity = $relationship->{from};
2595                          my $toEntity = $relationship->{to};                          my $toEntity = $relationship->{to};
2596                          Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(3);              Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);
2597                          if ($fromEntity eq $entityName) {                          if ($fromEntity eq $entityName) {
2598                                  if ($toEntity eq $entityName) {                                  if ($toEntity eq $entityName) {
2599                                          # Here the relationship is recursive.                                          # Here the relationship is recursive.
2600                                          push @bothList, $relationshipName;                                          push @bothList, $relationshipName;
2601                                          Trace("Relationship $relationshipName put in both-list.") if T(3);                      Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2602                                  } else {                                  } else {
2603                                          # Here the relationship comes from the entity.                                          # Here the relationship comes from the entity.
2604                                          push @fromList, $relationshipName;                                          push @fromList, $relationshipName;
2605                                          Trace("Relationship $relationshipName put in from-list.") if T(3);                      Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2606                                  }                                  }
2607                          } elsif ($toEntity eq $entityName) {                          } elsif ($toEntity eq $entityName) {
2608                                  # Here the relationship goes to the entity.                                  # Here the relationship goes to the entity.
2609                                  push @toList, $relationshipName;                                  push @toList, $relationshipName;
2610                                  Trace("Relationship $relationshipName put in to-list.") if T(3);                  Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2611                          }                          }
2612                  }                  }
2613                  # Create the nonrecursive joins. Note that we build two hashes for running                  # Create the nonrecursive joins. Note that we build two hashes for running
# Line 1896  Line 2623 
2623                                  # Create joins between the entity and this relationship.                                  # Create joins between the entity and this relationship.
2624                                  my $linkField = "$relationshipName.${linkType}_link";                                  my $linkField = "$relationshipName.${linkType}_link";
2625                                  my $joinClause = "$entityName.id = $linkField";                                  my $joinClause = "$entityName.id = $linkField";
2626                                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(4);                  Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2627                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2628                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2629                                  # Create joins between this relationship and the other relationships.                                  # Create joins between this relationship and the other relationships.
# Line 1917  Line 2644 
2644                                                          # relationship and itself are prohibited.                                                          # relationship and itself are prohibited.
2645                                                          my $relJoinClause = "$otherName.${otherType}_link = $linkField";                                                          my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2646                                                          $joinTable{$joinKey} = $relJoinClause;                                                          $joinTable{$joinKey} = $relJoinClause;
2647                                                          Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(4);                              Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2648                                                  }                                                  }
2649                                          }                                          }
2650                                  }                                  }
# Line 1926  Line 2653 
2653                                  # relationship can only be ambiguous with another recursive relationship,                                  # relationship can only be ambiguous with another recursive relationship,
2654                                  # and the incoming relationship from the outer loop is never recursive.                                  # and the incoming relationship from the outer loop is never recursive.
2655                                  for my $otherName (@bothList) {                                  for my $otherName (@bothList) {
2656                                          Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(3);                      Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2657                                          # Join from the left.                                          # Join from the left.
2658                                          $joinTable{"$relationshipName/$otherName"} =                                          $joinTable{"$relationshipName/$otherName"} =
2659                                                  "$linkField = $otherName.from_link";                                                  "$linkField = $otherName.from_link";
# Line 1941  Line 2668 
2668                  # rise to situations where we can't create the path we want; however, it is always                  # rise to situations where we can't create the path we want; however, it is always
2669                  # possible to get the same effect using multiple queries.                  # possible to get the same effect using multiple queries.
2670                  for my $relationshipName (@bothList) {                  for my $relationshipName (@bothList) {
2671                          Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(3);              Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2672                          # Join to the entity from each direction.                          # Join to the entity from each direction.
2673                          $joinTable{"$entityName/$relationshipName"} =                          $joinTable{"$entityName/$relationshipName"} =
2674                                  "$entityName.id = $relationshipName.from_link";                                  "$entityName.id = $relationshipName.from_link";
# Line 1992  Line 2719 
2719          # index descriptor does not exist, it will be created automatically so we can add          # index descriptor does not exist, it will be created automatically so we can add
2720          # the field to it.          # the field to it.
2721          unshift @{$newIndex->{IndexFields}}, $firstField;          unshift @{$newIndex->{IndexFields}}, $firstField;
2722        # If this is a one-to-many relationship, the "To" index is unique.
2723        if ($relationshipStructure->{arity} eq "1M" && $indexKey eq "To") {
2724            $newIndex->{Unique} = 'true';
2725        }
2726          # Add the index to the relation.          # Add the index to the relation.
2727          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
2728  }  }
# Line 2083  Line 2814 
2814                  # Here we have a field list. Loop through its fields.                  # Here we have a field list. Loop through its fields.
2815                  my $fieldStructures = $structure->{Fields};                  my $fieldStructures = $structure->{Fields};
2816                  for my $fieldName (keys %{$fieldStructures}) {                  for my $fieldName (keys %{$fieldStructures}) {
2817                Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
2818                          my $fieldData = $fieldStructures->{$fieldName};                          my $fieldData = $fieldStructures->{$fieldName};
2819                          # Get the field type.                          # Get the field type.
2820                          my $type = $fieldData->{type};                          my $type = $fieldData->{type};

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.39

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3