[Bio] / Sprout / ERDB.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDB.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Apr 5 05:17:01 2005 UTC revision 1.22, Wed Sep 14 09:56:58 2005 UTC
# Line 1  Line 1 
1  package ERDB;  package ERDB;
2    
3          use strict;          use strict;
         use Carp;  
4          use Tracer;          use Tracer;
5          use DBKernel;      use DBrtns;
6          use Data::Dumper;          use Data::Dumper;
7          use XML::Simple;          use XML::Simple;
8          use DBQuery;          use DBQuery;
9          use DBObject;          use DBObject;
10          use Stats;          use Stats;
11          use Time::HiRes qw(gettimeofday);          use Time::HiRes qw(gettimeofday);
12        use FIG;
13    
14  =head1 Entity-Relationship Database Package  =head1 Entity-Relationship Database Package
15    
# Line 33  Line 33 
33  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).  relation that contains two fields-- the feature ID (C<id>) and the alias name (C<alias>).
34  The B<FEATURE> entity also contains an optional virulence number. This is implemented  The B<FEATURE> entity also contains an optional virulence number. This is implemented
35  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number  as a separate relation C<FeatureVirulence> which contains an ID (C<id>) and a virulence number
36  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in the  (C<virulence>). If the virulence of a feature I<ABC> is known to be 6, there will be one row in
37  C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence number.  the C<FeatureVirulence> relation possessing the value I<ABC> as its ID and 6 as its virulence
38  If the virulence of I<ABC> is not known, there will not be any rows for it in C<FeatureVirulence>.  number. If the virulence of I<ABC> is not known, there will not be any rows for it in
39    C<FeatureVirulence>.
40    
41  Entities are connected by binary relationships implemented using single relations possessing the  Entities are connected by binary relationships implemented using single relations possessing the
42  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),  same name as the relationship itself and that has an I<arity> of 1-to-1 (C<11>), 1-to-many (C<1M>),
# Line 70  Line 71 
71  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet  is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet
72  fully implemented.  fully implemented.
73    
74    =head2 XML Database Description
75    
76    =head3 Data Types
77    
78    The ERDB system supports the following data types. Note that there are numerous string
79    types depending on the maximum length. Some database packages limit the total number of
80    characters you have in an index key; to insure the database works in all environments,
81    the type of string should be the shortest one possible that supports all the known values.
82    
83    =over 4
84    
85    =item char
86    
87    single ASCII character
88    
89    =item int
90    
91    32-bit signed integer
92    
93    =item date
94    
95    64-bit unsigned integer, representing a PERL date/time value
96    
97    =item text
98    
99    long string; Text fields cannot be used in indexes or sorting and do not support the
100    normal syntax of filter clauses, but can be up to a billion character in length
101    
102    =item float
103    
104    double-precision floating-point number
105    
106    =item boolean
107    
108    single-bit numeric value; The value is stored as a 16-bit signed integer (for
109    compatability with certain database packages), but the only values supported are
110    0 and 1.
111    
112    =item key-string
113    
114    variable-length string, maximum 40 characters
115    
116    =item name-string
117    
118    variable-length string, maximum 80 characters
119    
120    =item medium-string
121    
122    variable-length string, maximum 160 characters
123    
124    =item string
125    
126    variable-length string, maximum 255 characters
127    
128    =back
129    
130    =head3 Global Tags
131    
132    The entire database definition must be inside a B<Database> tag. The display name of
133    the database is given by the text associated with the B<Title> tag. The display name
134    is only used in the automated documentation. It has no other effect. The entities and
135    relationships are listed inside the B<Entities> and B<Relationships> tags,
136    respectively. None of these tags have attributes.
137    
138        <Database>
139            <Title>... display title here...</Title>
140            <Entities>
141                ... entity definitions here ...
142            </Entities>
143            <Relationships>
144                ... relationship definitions here...
145            </Relationships>
146        </Database>
147    
148    Entities, relationships, indexes, and fields all allow a text tag called B<Notes>.
149    The text inside the B<Notes> tag contains comments that will appear when the database
150    documentation is generated. Within a B<Notes> tag, you may use C<[i]> and C<[/i]> for
151    italics, C<[b]> and C<[/b]> for bold, and C<[p]> for a new paragraph.
152    
153    =head3 Fields
154    
155    Both entities and relationships have fields described by B<Field> tags. A B<Field>
156    tag can have B<Notes> associated with it. The complete set of B<Field> tags for an
157    object mus be inside B<Fields> tags.
158    
159        <Entity ... >
160            <Fields>
161                ... Field tags ...
162            </Fields>
163        </Entity>
164    
165    The attributes for the B<Field> tag are as follows.
166    
167    =over 4
168    
169    =item name
170    
171    Name of the field. The field name should contain only letters, digits, and hyphens (C<->),
172    and the first character should be a letter. Most underlying databases are case-insensitive
173    with the respect to field names, so a best practice is to use lower-case letters only.
174    
175    =item type
176    
177    Data type of the field. The legal data types are given above.
178    
179    =item relation
180    
181    Name of the relation containing the field. This should only be specified for entity
182    fields. The ERDB system does not support optional fields or multi-occurring fields
183    in the primary relation of an entity. Instead, they are put into secondary relations.
184    So, for example, in the C<Genome> entity, the C<group-name> field indicates a special
185    grouping used to select a subset of the genomes. A given genome may not be in any
186    groups or may be in multiple groups. Therefore, C<group-name> specifies a relation
187    value. The relation name specified must be a valid table name. By convention, it is
188    usually the entity name followed by a qualifying word (e.g. C<GenomeGroup>). In an
189    entity, the fields without a relation attribute are said to belong to the
190    I<primary relation>. This relation has the same name as the entity itself.
191    
192    =back
193    
194    =head3 Indexes
195    
196    An entity can have multiple alternate indexes associated with it. The fields must
197    be from the primary relation. The alternate indexes assist in ordering results
198    from a query. A relationship can have up to two indexes-- a I<to-index> and a
199    I<from-index>. These order the results when crossing the relationship. For
200    example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the
201    from-index would order the contigs of a ganome, and the to-index would order
202    the genomes of a contig. A relationship's index must specify only fields in
203    the relationship.
204    
205    The indexes for an entity must be listed inside the B<Indexes> tag. The from-index
206    of a relationship is specified using the B<FromIndex> tag; the to-index is specified
207    using the B<ToIndex> tag.
208    
209    Each index can contain a B<Notes> tag. In addition, it will have an B<IndexFields>
210    tag containing the B<IndexField> tags. These specify, in order, the fields used in
211    the index. The attributes of an B<IndexField> tag are as follows.
212    
213    =over 4
214    
215    =item name
216    
217    Name of the field.
218    
219    =item order
220    
221    Sort order of the field-- C<ascending> or C<descending>.
222    
223    =back
224    
225    The B<Index>, B<FromIndex>, and B<ToIndex> tags themselves have no attributes.
226    
227    =head3 Object and Field Names
228    
229    By convention entity and relationship names use capital casing (e.g. C<Genome> or
230    C<HasRegionsIn>. Most underlying databases, however, are aggressively case-insensitive
231    with respect to relation names, converting them internally to all-upper case or
232    all-lower case.
233    
234    If syntax or parsing errors occur when you try to load or use an ERDB database, the
235    most likely reason is that one of your objects has an SQL reserved word as its name.
236    The list of SQL reserved words keeps increasing; however, most are unlikely to show
237    up as a noun or declarative verb phrase. The exceptions are C<Group>, C<User>,
238    C<Table>, C<Index>, C<Object>, C<Date>, C<Number>, C<Update>, C<Time>, C<Percent>,
239    C<Memo>, C<Order>, and C<Sum>. This problem can crop up in field names as well.
240    
241    Every entity has a field called C<id> that acts as its primary key. Every relationship
242    has fields called C<from-link> and C<to-link> that contain copies of the relevant
243    entity IDs. These are essentially ERDB's reserved words, and should not be used
244    for user-defined field names.
245    
246    =head3 Entities
247    
248    An entity is described by the B<Entity> tag. The entity can contain B<Notes>, an
249    B<Indexes> tag containing one or more secondary indexes, and a B<Fields> tag
250    containing one or more fields. The attributes of the B<Entity> tag are as follows.
251    
252    =over 4
253    
254    =item name
255    
256    Name of the entity. The entity name, by convention, uses capital casing (e.g. C<Genome>
257    or C<GroupBlock>) and should be a noun or noun phrase.
258    
259    =item keyType
260    
261    Data type of the primary key. The primary key is always named C<id>.
262    
263    =back
264    
265    =head3 Relationships
266    
267    A relationship is described by the C<Relationship> tag. Within a relationship,
268    there can be a C<Notes> tag, a C<Fields> tag containing the intersection data
269    fields, a C<FromIndex> tag containing the from-index, and a C<ToIndex> tag containing
270    the to-index.
271    
272    The C<Relationship> tag has the following attributes.
273    
274    =over 4
275    
276    =item name
277    
278    Name of the relationship. The relationship name, by convention, uses capital casing
279    (e.g. C<ContainsRegionIn> or C<HasContig>), and should be a declarative verb
280    phrase, designed to fit between the from-entity and the to-entity (e.g.
281    Block C<ContainsRegionIn> Genome).
282    
283    =item from
284    
285    Name of the entity from which the relationship starts.
286    
287    =item to
288    
289    Name of the entity to which the relationship proceeds.
290    
291    =item arity
292    
293    Relationship type: C<1M> for one-to-many and C<MM> for many-to-many.
294    
295    =back
296    
297  =cut  =cut
298    
299  # GLOBALS  # GLOBALS
# Line 77  Line 301 
301  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.  # Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string.
302  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field  # "maxLen" is the maximum permissible length of the incoming string data used to populate a field
303  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation  # of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation
304   #string is specified in the field definition.  # string is specified in the field definition. "avgLen" is the average byte length for estimating
305  my %TypeTable = ( char =>        { sqlType => 'CHAR(1)',                        maxLen => 1,                    dataGen => "StringGen('A')" },  # record sizes.
306                                    int =>         { sqlType => 'INTEGER',                        maxLen => 20,                   dataGen => "IntGen(0, 99999999)" },  my %TypeTable = ( char =>    { sqlType => 'CHAR(1)',            maxLen => 1,            avgLen =>   1, dataGen => "StringGen('A')" },
307                                    string =>  { sqlType => 'VARCHAR(255)',               maxLen => 255,                  dataGen => "StringGen(IntGen(10,250))" },                    int =>     { sqlType => 'INTEGER',            maxLen => 20,           avgLen =>   4, dataGen => "IntGen(0, 99999999)" },
308                                    text =>        { sqlType => 'TEXT',                           maxLen => 1000000000,   dataGen => "StringGen(IntGen(80,1000))" },                    string =>  { sqlType => 'VARCHAR(255)',       maxLen => 255,          avgLen => 100, dataGen => "StringGen(IntGen(10,250))" },
309                                    date =>        { sqlType => 'BIGINT',                         maxLen => 80,                   dataGen => "DateGen(-7, 7, IntGen(0,1400))" },                    text =>    { sqlType => 'TEXT',               maxLen => 1000000000,   avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" },
310                                    float =>       { sqlType => 'DOUBLE PRECISION',       maxLen => 40,                   dataGen => "FloatGen(0.0, 100.0)" },                    date =>    { sqlType => 'BIGINT',             maxLen => 80,           avgLen =>   8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" },
311                                    boolean => { sqlType => 'SMALLINT',                   maxLen => 1,                    dataGen => "IntGen(0, 1)" },                    float =>   { sqlType => 'DOUBLE PRECISION',   maxLen => 40,           avgLen =>   8, dataGen => "FloatGen(0.0, 100.0)" },
312                      boolean => { sqlType => 'SMALLINT',           maxLen => 1,            avgLen =>   2, dataGen => "IntGen(0, 1)" },
313                               'key-string' =>                               'key-string' =>
314                                                           { sqlType => 'VARCHAR(40)',            maxLen => 40,                   dataGen => "StringGen(IntGen(10,40))" },                               { sqlType => 'VARCHAR(40)',        maxLen => 40,           avgLen =>  10, dataGen => "StringGen(IntGen(10,40))" },
315                                   'name-string' =>                                   'name-string' =>
316                                                           { sqlType => 'VARCHAR(80)',            maxLen => 80,                   dataGen => "StringGen(IntGen(10,80))" },                               { sqlType => 'VARCHAR(80)',        maxLen => 80,           avgLen =>  40, dataGen => "StringGen(IntGen(10,80))" },
317                                   'medium-string' =>                                   'medium-string' =>
318                                                           { sqlType => 'VARCHAR(160)',           maxLen => 160,                  dataGen => "StringGen(IntGen(10,160))" },                               { sqlType => 'VARCHAR(160)',       maxLen => 160,          avgLen =>  40, dataGen => "StringGen(IntGen(10,160))" },
319                                  );                                  );
320    
321  # Table translating arities into natural language.  # Table translating arities into natural language.
# Line 140  Line 365 
365                                   _metaData => $metaData                                   _metaData => $metaData
366                             };                             };
367          # Bless and return it.          # Bless and return it.
368          bless $self;      bless $self, $class;
369          return $self;          return $self;
370  }  }
371    
372  =head3 ShowMetaData  =head3 ShowMetaData
373    
374  C<< $database->ShowMetaData($fileName); >>  C<< $erdb->ShowMetaData($fileName); >>
375    
376  This method outputs a description of the database. This description can be used to help users create  This method outputs a description of the database. This description can be used to help users create
377  the data to be loaded into the relations.  the data to be loaded into the relations.
# Line 278  Line 503 
503          print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");          print HTMLOUT _OpenTable("Join Table", "Source", "Target", "Join Condition");
504          # Loop through the joins.          # Loop through the joins.
505          my $joinTable = $metadata->{Joins};          my $joinTable = $metadata->{Joins};
506          for my $joinKey (sort keys %{$joinTable}) {      my @joinKeys = keys %{$joinTable};
507        for my $joinKey (sort @joinKeys) {
508                  # Separate out the source, the target, and the join clause.                  # Separate out the source, the target, and the join clause.
509                  $joinKey =~ m!([^/]*)/(.*)$!;          $joinKey =~ m!^([^/]+)/(.+)$!;
510                  my ($source, $target, $clause) = ($self->ComputeObjectSentence($1),          my ($sourceRelation, $targetRelation) = ($1, $2);
511                                                                                    $self->ComputeObjectSentence($2),          Trace("Join with key $joinKey is from $sourceRelation to $targetRelation.") if T(4);
512                                                                                    $joinTable->{$joinKey});          my $source = $self->ComputeObjectSentence($sourceRelation);
513            my $target = $self->ComputeObjectSentence($targetRelation);
514            my $clause = $joinTable->{$joinKey};
515                  # Display them in a table row.                  # Display them in a table row.
516                  print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";                  print HTMLOUT "<tr><td>$source</td><td>$target</td><td>$clause</td></tr>\n";
517          }          }
# Line 298  Line 526 
526    
527  =head3 DumpMetaData  =head3 DumpMetaData
528    
529  C<< $database->DumpMetaData(); >>  C<< $erdb->DumpMetaData(); >>
530    
531  Return a dump of the metadata structure.  Return a dump of the metadata structure.
532    
# Line 313  Line 541 
541    
542  =head3 CreateTables  =head3 CreateTables
543    
544  C<< $datanase->CreateTables(); >>  C<< $erdb->CreateTables(); >>
545    
546  This method creates the tables for the database from the metadata structure loaded by the  This method creates the tables for the database from the metadata structure loaded by the
547  constructor. It is expected this function will only be used on rare occasions, when the  constructor. It is expected this function will only be used on rare occasions, when the
# Line 328  Line 556 
556          my $metadata = $self->{_metaData};          my $metadata = $self->{_metaData};
557          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
558          # Loop through the entities.          # Loop through the entities.
559          while (my ($entityName, $entityData) = each %{$metadata->{Entities}}) {      my $entityHash = $metadata->{Entities};
560        for my $entityName (keys %{$entityHash}) {
561            my $entityData = $entityHash->{$entityName};
562                  # Tell the user what we're doing.                  # Tell the user what we're doing.
563                  Trace("Creating relations for entity $entityName.") if T(1);                  Trace("Creating relations for entity $entityName.") if T(1);
564                  # Loop through the entity's relations.                  # Loop through the entity's relations.
# Line 349  Line 579 
579    
580  =head3 CreateTable  =head3 CreateTable
581    
582  C<< $database->CreateTable($tableName, $indexFlag); >>  C<< $erdb->CreateTable($tableName, $indexFlag, $estimatedRows); >>
583    
584  Create the table for a relation and optionally create its indexes.  Create the table for a relation and optionally create its indexes.
585    
# Line 359  Line 589 
589    
590  Name of the relation (which will also be the table name).  Name of the relation (which will also be the table name).
591    
592  =item $indexFlag  =item indexFlag
593    
594  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,  TRUE if the indexes for the relation should be created, else FALSE. If FALSE,
595  L</CreateIndexes> must be called later to bring the indexes into existence.  L</CreateIndexes> must be called later to bring the indexes into existence.
596    
597    =item estimatedRows (optional)
598    
599    If specified, the estimated maximum number of rows for the relation. This
600    information allows the creation of tables using storage engines that are
601    faster but require size estimates, such as MyISAM.
602    
603  =back  =back
604    
605  =cut  =cut
606    
607  sub CreateTable {  sub CreateTable {
608          # Get the parameters.          # Get the parameters.
609          my ($self, $relationName, $indexFlag) = @_;      my ($self, $relationName, $indexFlag, $estimatedRows) = @_;
610          # Get the database handle.          # Get the database handle.
611          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
612          # Get the relation data and determine whether or not the relation is primary.          # Get the relation data and determine whether or not the relation is primary.
# Line 394  Line 630 
630          # Insure the table is not already there.          # Insure the table is not already there.
631          $dbh->drop_table(tbl => $relationName);          $dbh->drop_table(tbl => $relationName);
632          Trace("Table $relationName dropped.") if T(2);          Trace("Table $relationName dropped.") if T(2);
633        # If there are estimated rows, create an estimate so we can take advantage of
634        # faster DB technologies.
635        my $estimation = undef;
636        if ($estimatedRows) {
637            $estimation = [$self->EstimateRowSize($relationName), $estimatedRows];
638        }
639          # Create the table.          # Create the table.
640          Trace("Creating table $relationName: $fieldThing") if T(2);          Trace("Creating table $relationName: $fieldThing") if T(2);
641          $dbh->create_table(tbl => $relationName, flds => $fieldThing);      $dbh->create_table(tbl => $relationName, flds => $fieldThing, estimates => $estimation);
642          Trace("Relation $relationName created in database.") if T(2);          Trace("Relation $relationName created in database.") if T(2);
643          # If we want to build the indexes, we do it here.          # If we want to build the indexes, we do it here.
644          if ($indexFlag) {          if ($indexFlag) {
# Line 406  Line 648 
648    
649  =head3 CreateIndex  =head3 CreateIndex
650    
651  C<< $database->CreateIndex($relationName); >>  C<< $erdb->CreateIndex($relationName); >>
652    
653  Create the indexes for a relation. If a table is being loaded from a large source file (as  Create the indexes for a relation. If a table is being loaded from a large source file (as
654  is the case in L</LoadTable>), it is best to create the indexes after the load. If that is  is the case in L</LoadTable>), it is sometimes best to create the indexes after the load.
655  the case, then L</CreateTable> should be called with the index flag set to FALSE, and this  If that is the case, then L</CreateTable> should be called with the index flag set to
656  method used after the load to create the indexes for the table.  FALSE, and this method used after the load to create the indexes for the table.
657    
658  =cut  =cut
659    
# Line 423  Line 665 
665          # Get the database handle.          # Get the database handle.
666          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
667          # Now we need to create this relation's indexes. We do this by looping through its index table.          # Now we need to create this relation's indexes. We do this by looping through its index table.
668          while (my ($indexName, $indexData) = each %{$relationData->{Indexes}}) {      my $indexHash = $relationData->{Indexes};
669        for my $indexName (keys %{$indexHash}) {
670            my $indexData = $indexHash->{$indexName};
671                  # Get the index's field list.                  # Get the index's field list.
672                  my @fieldList = _FixNames(@{$indexData->{IndexFields}});                  my @fieldList = _FixNames(@{$indexData->{IndexFields}});
673                  my $flds = join(', ', @fieldList);                  my $flds = join(', ', @fieldList);
# Line 437  Line 681 
681    
682  =head3 LoadTables  =head3 LoadTables
683    
684  C<< my $stats = $database->LoadTables($directoryName, $rebuild); >>  C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >>
685    
686  This method will load the database tables from a directory. The tables must already have been created  This method will load the database tables from a directory. The tables must already have been created
687  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;  in the database. (This can be done by calling L</CreateTables>.) The caller passes in a directory name;
# Line 507  Line 751 
751    
752  =head3 GetTableNames  =head3 GetTableNames
753    
754  C<< my @names = $database->GetTableNames; >>  C<< my @names = $erdb->GetTableNames; >>
755    
756  Return a list of the relations required to implement this database.  Return a list of the relations required to implement this database.
757    
# Line 524  Line 768 
768    
769  =head3 GetEntityTypes  =head3 GetEntityTypes
770    
771  C<< my @names = $database->GetEntityTypes; >>  C<< my @names = $erdb->GetEntityTypes; >>
772    
773  Return a list of the entity type names.  Return a list of the entity type names.
774    
# Line 539  Line 783 
783          return sort keys %{$entityList};          return sort keys %{$entityList};
784  }  }
785    
786    =head3 IsEntity
787    
788    C<< my $flag = $erdb->IsEntity($entityName); >>
789    
790    Return TRUE if the parameter is an entity name, else FALSE.
791    
792    =over 4
793    
794    =item entityName
795    
796    Object name to be tested.
797    
798    =item RETURN
799    
800    Returns TRUE if the specified string is an entity name, else FALSE.
801    
802    =back
803    
804    =cut
805    
806    sub IsEntity {
807        # Get the parameters.
808        my ($self, $entityName) = @_;
809        # Test to see if it's an entity.
810        return exists $self->{_metaData}->{Entities}->{$entityName};
811    }
812    
813  =head3 Get  =head3 Get
814    
815  C<< my $query = $database->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>  C<< my $query = $erdb->Get(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
816    
817  This method returns a query object for entities of a specified type using a specified filter.  This method returns a query object for entities of a specified type using a specified filter.
818  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each
# Line 549  Line 820 
820  following call requests all B<Genome> objects for the genus specified in the variable  following call requests all B<Genome> objects for the genus specified in the variable
821  $genus.  $genus.
822    
823  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = ?", $genus); >>
824    
825  The WHERE clause contains a single question mark, so there is a single additional  The WHERE clause contains a single question mark, so there is a single additional
826  parameter representing the parameter value. It would also be possible to code  parameter representing the parameter value. It would also be possible to code
827    
828  C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  C<< $query = $erdb->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>
829    
830  however, this version of the call would generate a syntax error if there were any quote  however, this version of the call would generate a syntax error if there were any quote
831  characters inside the variable C<$genus>.  characters inside the variable C<$genus>.
# Line 566  Line 837 
837  It is possible to specify multiple entity and relationship names in order to retrieve more than  It is possible to specify multiple entity and relationship names in order to retrieve more than
838  one object's data at the same time, which allows highly complex joined queries. For example,  one object's data at the same time, which allows highly complex joined queries. For example,
839    
840  C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>  C<< $query = $erdb->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", $genus); >>
841    
842  If multiple names are specified, then the query processor will automatically determine a  If multiple names are specified, then the query processor will automatically determine a
843  join path between the entities and relationships. The algorithm used is very simplistic.  join path between the entities and relationships. The algorithm used is very simplistic.
# Line 729  Line 1000 
1000                          $command .= " ORDER BY $orderClause";                          $command .= " ORDER BY $orderClause";
1001                  }                  }
1002          }          }
1003          Trace("SQL query: $command") if T(2);      Trace("SQL query: $command") if T(3);
1004          Trace("PARMS: '" . (join "', '", @params) . "'") if (T(3) && (@params > 0));      Trace("PARMS: '" . (join "', '", @params) . "'") if (T(4) && (@params > 0));
1005          my $sth = $dbh->prepare_command($command);          my $sth = $dbh->prepare_command($command);
1006          # Execute it with the parameters bound in.          # Execute it with the parameters bound in.
1007          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());          $sth->execute(@params) || Confess("SELECT error" . $sth->errstr());
# Line 739  Line 1010 
1010          return $retVal;          return $retVal;
1011  }  }
1012    
1013    =head3 GetList
1014    
1015    C<< my @dbObjects = $erdb->GetList(\@objectNames, $filterClause, $param1, $param2, ..., $paramN); >>
1016    
1017    Return a list of object descriptors for the specified objects as determined by the
1018    specified filter clause.
1019    
1020    This method is essentially the same as L</Get> except it returns a list of objects rather
1021    than a query object that can be used to get the results one record at a time.
1022    
1023    =over 4
1024    
1025    =item objectNames
1026    
1027    List containing the names of the entity and relationship objects to be retrieved.
1028    
1029    =item filterClause
1030    
1031    WHERE clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1032    be parameterized with parameter markers (C<?>). Each field used in the WHERE clause must be
1033    specified in the standard form B<I<objectName>(I<fieldName>)>. Any parameters specified
1034    in the filter clause should be added to the parameter list as additional parameters. The
1035    fields in a filter clause can come from primary entity relations, relationship relations,
1036    or secondary entity relations; however, all of the entities and relationships involved must
1037    be included in the list of object names.
1038    
1039    The filter clause can also specify a sort order. To do this, simply follow the filter string
1040    with an ORDER BY clause. For example, the following filter string gets all genomes for a
1041    particular genus and sorts them by species name.
1042    
1043    C<< "Genome(genus) = ? ORDER BY Genome(species)" >>
1044    
1045    The rules for field references in a sort order are the same as those for field references in the
1046    filter clause in general; however, odd things may happen if a sort field is from a secondary
1047    relation.
1048    
1049    =item param1, param2, ..., paramN
1050    
1051    Parameter values to be substituted into the filter clause.
1052    
1053    =item RETURN
1054    
1055    Returns a list of B<DBObject>s that satisfy the query conditions.
1056    
1057    =back
1058    
1059    =cut
1060    #: Return Type @%
1061    sub GetList {
1062        # Get the parameters.
1063        my ($self, $objectNames, $filterClause, @params) = @_;
1064        # Declare the return variable.
1065        my @retVal = ();
1066        # Perform the query.
1067        my $query = $self->Get($objectNames, $filterClause, @params);
1068        # Loop through the results.
1069        while (my $object = $query->Fetch) {
1070            push @retVal, $object;
1071        }
1072        # Return the result.
1073        return @retVal;
1074    }
1075    
1076  =head3 ComputeObjectSentence  =head3 ComputeObjectSentence
1077    
1078  C<< my $sentence = $database->ComputeObjectSentence($objectName); >>  C<< my $sentence = $erdb->ComputeObjectSentence($objectName); >>
1079    
1080  Check an object name, and if it is a relationship convert it to a relationship sentence.  Check an object name, and if it is a relationship convert it to a relationship sentence.
1081    
# Line 776  Line 1110 
1110    
1111  =head3 DumpRelations  =head3 DumpRelations
1112    
1113  C<< $database->DumpRelations($outputDirectory); >>  C<< $erdb->DumpRelations($outputDirectory); >>
1114    
1115  Write the contents of all the relations to tab-delimited files in the specified directory.  Write the contents of all the relations to tab-delimited files in the specified directory.
1116  Each file will have the same name as the relation dumped, with an extension of DTX.  Each file will have the same name as the relation dumped, with an extension of DTX.
# Line 797  Line 1131 
1131          # Now we need to run through all the relations. First, we loop through the entities.          # Now we need to run through all the relations. First, we loop through the entities.
1132          my $metaData = $self->{_metaData};          my $metaData = $self->{_metaData};
1133          my $entities = $metaData->{Entities};          my $entities = $metaData->{Entities};
1134          while (my ($entityName, $entityStructure) = each %{$entities}) {      for my $entityName (keys %{$entities}) {
1135            my $entityStructure = $entities->{$entityName};
1136                  # Get the entity's relations.                  # Get the entity's relations.
1137                  my $relationList = $entityStructure->{Relations};                  my $relationList = $entityStructure->{Relations};
1138                  # Loop through the relations, dumping them.                  # Loop through the relations, dumping them.
1139                  while (my ($relationName, $relation) = each %{$relationList}) {          for my $relationName (keys %{$relationList}) {
1140                my $relation = $relationList->{$relationName};
1141                          $self->_DumpRelation($outputDirectory, $relationName, $relation);                          $self->_DumpRelation($outputDirectory, $relationName, $relation);
1142                  }                  }
1143          }          }
1144          # Next, we loop through the relationships.          # Next, we loop through the relationships.
1145          my $relationships = $metaData->{Relationships};          my $relationships = $metaData->{Relationships};
1146          while (my ($relationshipName, $relationshipStructure) = each %{$relationships}) {      for my $relationshipName (keys %{$relationships}) {
1147            my $relationshipStructure = $relationships->{$relationshipName};
1148                  # Dump this relationship's relation.                  # Dump this relationship's relation.
1149                  $self->_DumpRelation($outputDirectory, $relationshipName, $relationshipStructure->{Relations}->{$relationshipName});                  $self->_DumpRelation($outputDirectory, $relationshipName, $relationshipStructure->{Relations}->{$relationshipName});
1150          }          }
# Line 815  Line 1152 
1152    
1153  =head3 InsertObject  =head3 InsertObject
1154    
1155  C<< my $ok = $database->InsertObject($objectType, \%fieldHash); >>  C<< my $ok = $erdb->InsertObject($objectType, \%fieldHash); >>
1156    
1157  Insert an object into the database. The object is defined by a type name and then a hash  Insert an object into the database. The object is defined by a type name and then a hash
1158  of field names to values. Field values in the primary relation are represented by scalars.  of field names to values. Field values in the primary relation are represented by scalars.
# Line 824  Line 1161 
1161  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases  example, the following line inserts an inactive PEG feature named C<fig|188.1.peg.1> with aliases
1162  C<ZP_00210270.1> and C<gi|46206278>.  C<ZP_00210270.1> and C<gi|46206278>.
1163    
1164  C<< $database->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>  C<< $erdb->InsertObject('Feature', { id => 'fig|188.1.peg.1', active => 0, feature-type => 'peg', alias => ['ZP_00210270.1', 'gi|46206278']}); >>
1165    
1166  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
1167  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
1168    
1169  C<< $database->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
1170    
1171  =over 4  =over 4
1172    
# Line 861  Line 1198 
1198          # Loop through the relations. We'll build insert statements for each one. If a relation is          # Loop through the relations. We'll build insert statements for each one. If a relation is
1199          # secondary, we may end up generating multiple insert statements. If an error occurs, we          # secondary, we may end up generating multiple insert statements. If an error occurs, we
1200          # stop the loop.          # stop the loop.
1201          while ($retVal && (my ($relationName, $relationDefinition) = each %{$relationTable})) {      my @relationList = keys %{$relationTable};
1202        for (my $i = 0; $retVal && $i <= $#relationList; $i++) {
1203            my $relationName = $relationList[$i];
1204            my $relationDefinition = $relationTable->{$relationName};
1205                  # Get the relation's fields. For each field we will collect a value in the corresponding                  # Get the relation's fields. For each field we will collect a value in the corresponding
1206                  # position of the @valueList array. If one of the fields is missing, we will add it to the                  # position of the @valueList array. If one of the fields is missing, we will add it to the
1207                  # @missing list.                  # @missing list.
# Line 951  Line 1291 
1291    
1292  =head3 LoadTable  =head3 LoadTable
1293    
1294  C<< my %results = $database->LoadTable($fileName, $relationName, $truncateFlag); >>  C<< my %results = $erdb->LoadTable($fileName, $relationName, $truncateFlag); >>
1295    
1296  Load data from a tab-delimited file into a specified table, optionally re-creating the table first.  Load data from a tab-delimited file into a specified table, optionally re-creating the table
1297    first.
1298    
1299  =over 4  =over 4
1300    
# Line 971  Line 1312 
1312    
1313  =item RETURN  =item RETURN
1314    
1315  Returns a statistical object containing the number of records read and a list of the error messages.  Returns a statistical object containing the number of records read and a list of
1316    the error messages.
1317    
1318  =back  =back
1319    
# Line 982  Line 1324 
1324          # Create the statistical return object.          # Create the statistical return object.
1325          my $retVal = _GetLoadStats();          my $retVal = _GetLoadStats();
1326          # Trace the fact of the load.          # Trace the fact of the load.
1327          Trace("Loading table $relationName from $fileName") if T(1);      Trace("Loading table $relationName from $fileName") if T(2);
1328          # Get the database handle.          # Get the database handle.
1329          my $dbh = $self->{_dbh};          my $dbh = $self->{_dbh};
1330        # Get the input file size.
1331        my $fileSize = -s $fileName;
1332          # Get the relation data.          # Get the relation data.
1333          my $relation = $self->_FindRelation($relationName);          my $relation = $self->_FindRelation($relationName);
1334          # Check the truncation flag.          # Check the truncation flag.
1335          if ($truncateFlag) {          if ($truncateFlag) {
1336                  Trace("Creating table $relationName") if T(1);          Trace("Creating table $relationName") if T(2);
1337            # Compute the row count estimate. We take the size of the load file,
1338            # divide it by the estimated row size, and then multiply by 1.5 to
1339            # leave extra room. We postulate a minimum row count of 1000 to
1340            # prevent problems with incoming empty load files.
1341            my $rowSize = $self->EstimateRowSize($relationName);
1342            my $estimate = FIG::max($fileSize * 1.5 / $rowSize, 1000);
1343                  # Re-create the table without its index.                  # Re-create the table without its index.
1344                  $self->CreateTable($relationName, 0);          $self->CreateTable($relationName, 0, $estimate);
1345            # If this is a pre-index DBMS, create the index here.
1346            if ($dbh->{_preIndex}) {
1347                eval {
1348                    $self->CreateIndex($relationName);
1349                };
1350                if ($@) {
1351                    $retVal->AddMessage($@);
1352                }
1353            }
1354          }          }
         # Determine whether or not this is a primary relation. Primary relations have an extra  
         # field indicating whether or not a given object is new or was loaded from the flat files.  
         my $primary = $self->_IsPrimary($relationName);  
         # Get the number of fields in this relation.  
         my @fieldList = @{$relation->{Fields}};  
         my $fieldCount = @fieldList;  
         # Record the number of expected fields.  
         my $expectedFields = $fieldCount + ($primary ? 1 : 0);  
         # Start a database transaction.  
         $dbh->begin_tran;  
         # Open the relation file. We need to create a cleaned-up copy before loading.  
         open TABLEIN, '<', $fileName;  
         my $tempName = "$fileName.tbl";  
         open TABLEOUT, '>', $tempName;  
         # Loop through the file.  
         while (<TABLEIN>) {  
                 # Chop off the new-line character.  
                 my $record = $_;  
                 chomp $record;  
         # Only proceed if the record is non-blank.  
         if ($record) {  
             # Escape all the backslashes found in the line.  
             $record =~ s/\\/\\\\/g;  
             # Eliminate any trailing tabs.  
             chop $record while substr($record, -1) eq "\t";  
             # If this is a primary relation, add a 0 for the new-record flag (indicating that  
             # this record is not new, but part of the original load).  
             if ($primary) {  
                 $record .= "\t0";  
             }  
             # Write the record.  
             print TABLEOUT "$record\n";  
             # Count the record read.  
             my $count = $retVal->Add('records');  
             my $len = length $record;  
             Trace("Record $count written with $len characters.") if T(4);  
         }  
         }  
         # Close the files.  
         close TABLEIN;  
         close TABLEOUT;  
     Trace("Temporary file $tempName created.") if T(4);  
1355      # Load the table.      # Load the table.
1356          my $rv;          my $rv;
1357          eval {          eval {
1358                  $rv = $dbh->load_table(file => $tempName, tbl => $relationName);          $rv = $dbh->load_table(file => $fileName, tbl => $relationName);
1359          };          };
1360          if (!defined $rv) {          if (!defined $rv) {
1361          $retVal->AddMessage($@) if ($@);          $retVal->AddMessage($@) if ($@);
1362          $retVal->AddMessage("Table load failed for $relationName using $tempName.");          $retVal->AddMessage("Table load failed for $relationName using $fileName.");
1363                  Trace("Table load failed for $relationName.") if T(1);                  Trace("Table load failed for $relationName.") if T(1);
1364          } else {          } else {
1365                  # Here we successfully loaded the table. Trace the number of records loaded.          # Here we successfully loaded the table.
1366                  Trace("$retVal->{records} records read for $relationName.") if T(1);          $retVal->Add("tables");
1367            my $size = -s $fileName;
1368            Trace("$size bytes loaded into $relationName.") if T(2);
1369                  # If we're rebuilding, we need to create the table indexes.                  # If we're rebuilding, we need to create the table indexes.
1370                  if ($truncateFlag) {          if ($truncateFlag && ! $dbh->{_preIndex}) {
1371                          eval {                          eval {
1372                                  $self->CreateIndex($relationName);                                  $self->CreateIndex($relationName);
1373                          };                          };
# Line 1059  Line 1378 
1378          }          }
1379          # Commit the database changes.          # Commit the database changes.
1380          $dbh->commit_tran;          $dbh->commit_tran;
1381          # Delete the temporary file.      # Analyze the table to improve performance.
1382          unlink $tempName;      $dbh->vacuum_it($relationName);
1383          # Return the statistics.          # Return the statistics.
1384          return $retVal;          return $retVal;
1385  }  }
1386    
1387  =head3 GenerateEntity  =head3 GenerateEntity
1388    
1389  C<< my $fieldHash = $database->GenerateEntity($id, $type, \%values); >>  C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >>
1390    
1391  Generate the data for a new entity instance. This method creates a field hash suitable for  Generate the data for a new entity instance. This method creates a field hash suitable for
1392  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest  passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest
# Line 1123  Line 1442 
1442          return $this;          return $this;
1443  }  }
1444    
1445    =head3 GetEntity
1446    
1447    C<< my $entityObject = $erdb->GetEntity($entityType, $ID); >>
1448    
1449    Return an object describing the entity instance with a specified ID.
1450    
1451    =over 4
1452    
1453    =item entityType
1454    
1455    Entity type name.
1456    
1457    =item ID
1458    
1459    ID of the desired entity.
1460    
1461    =item RETURN
1462    
1463    Returns a B<DBObject> representing the desired entity instance, or an undefined value if no
1464    instance is found with the specified key.
1465    
1466    =back
1467    
1468    =cut
1469    
1470    sub GetEntity {
1471        # Get the parameters.
1472        my ($self, $entityType, $ID) = @_;
1473        # Create a query.
1474        my $query = $self->Get([$entityType], "$entityType(id) = ?", $ID);
1475        # Get the first (and only) object.
1476        my $retVal = $query->Fetch();
1477        # Return the result.
1478        return $retVal;
1479    }
1480    
1481    =head3 GetEntityValues
1482    
1483    C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >>
1484    
1485    Return a list of values from a specified entity instance.
1486    
1487    =over 4
1488    
1489    =item entityType
1490    
1491    Entity type name.
1492    
1493    =item ID
1494    
1495    ID of the desired entity.
1496    
1497    =item fields
1498    
1499    List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.
1500    
1501    =item RETURN
1502    
1503    Returns a flattened list of the values of the specified fields for the specified entity.
1504    
1505    =back
1506    
1507    =cut
1508    
1509    sub GetEntityValues {
1510        # Get the parameters.
1511        my ($self, $entityType, $ID, $fields) = @_;
1512        # Get the specified entity.
1513        my $entity = $self->GetEntity($entityType, $ID);
1514        # Declare the return list.
1515        my @retVal = ();
1516        # If we found the entity, push the values into the return list.
1517        if ($entity) {
1518            push @retVal, $entity->Values($fields);
1519        }
1520        # Return the result.
1521        return @retVal;
1522    }
1523    
1524    =head3 GetAll
1525    
1526    C<< my @list = $erdb->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>
1527    
1528    Return a list of values taken from the objects returned by a query. The first three
1529    parameters correspond to the parameters of the L</Get> method. The final parameter is
1530    a list of the fields desired from each record found by the query. The field name
1531    syntax is the standard syntax used for fields in the B<ERDB> system--
1532    B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity
1533    or relationship and I<fieldName> is the name of the field.
1534    
1535    The list returned will be a list of lists. Each element of the list will contain
1536    the values returned for the fields specified in the fourth parameter. If one of the
1537    fields specified returns multiple values, they are flattened in with the rest. For
1538    example, the following call will return a list of the features in a particular
1539    spreadsheet cell, and each feature will be represented by a list containing the
1540    feature ID followed by all of its aliases.
1541    
1542    C<< $query = $erdb->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>
1543    
1544    =over 4
1545    
1546    =item objectNames
1547    
1548    List containing the names of the entity and relationship objects to be retrieved.
1549    
1550    =item filterClause
1551    
1552    WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can
1553    be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form
1554    B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the
1555    parameter list as additional parameters. The fields in a filter clause can come from primary
1556    entity relations, relationship relations, or secondary entity relations; however, all of the
1557    entities and relationships involved must be included in the list of object names.
1558    
1559    =item parameterList
1560    
1561    List of the parameters to be substituted in for the parameters marks in the filter clause.
1562    
1563    =item fields
1564    
1565    List of the fields to be returned in each element of the list returned.
1566    
1567    =item count
1568    
1569    Maximum number of records to return. If omitted or 0, all available records will be returned.
1570    
1571    =item RETURN
1572    
1573    Returns a list of list references. Each element of the return list contains the values for the
1574    fields specified in the B<fields> parameter.
1575    
1576    =back
1577    
1578    =cut
1579    #: Return Type @@;
1580    sub GetAll {
1581        # Get the parameters.
1582        my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;
1583        # Translate the parameters from a list reference to a list. If the parameter
1584        # list is a scalar we convert it into a singleton list.
1585        my @parmList = ();
1586        if (ref $parameterList eq "ARRAY") {
1587            @parmList = @{$parameterList};
1588        } else {
1589            push @parmList, $parameterList;
1590        }
1591        # Create the query.
1592        my $query = $self->Get($objectNames, $filterClause, @parmList);
1593        # Set up a counter of the number of records read.
1594        my $fetched = 0;
1595        # Insure the counter has a value.
1596        if (!defined $count) {
1597            $count = 0;
1598        }
1599        # Loop through the records returned, extracting the fields. Note that if the
1600        # counter is non-zero, we stop when the number of records read hits the count.
1601        my @retVal = ();
1602        while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {
1603            my @rowData = $row->Values($fields);
1604            push @retVal, \@rowData;
1605            $fetched++;
1606        }
1607        # Return the resulting list.
1608        return @retVal;
1609    }
1610    
1611    =head3 EstimateRowSize
1612    
1613    C<< my $rowSize = $erdb->EstimateRowSize($relName); >>
1614    
1615    Estimate the row size of the specified relation. The estimated row size is computed by adding
1616    up the average length for each data type.
1617    
1618    =over 4
1619    
1620    =item relName
1621    
1622    Name of the relation whose estimated row size is desired.
1623    
1624    =item RETURN
1625    
1626    Returns an estimate of the row size for the specified relation.
1627    
1628    =back
1629    
1630    =cut
1631    #: Return Type $;
1632    sub EstimateRowSize {
1633        # Get the parameters.
1634        my ($self, $relName) = @_;
1635        # Declare the return variable.
1636        my $retVal = 0;
1637        # Find the relation descriptor.
1638        my $relation = $self->_FindRelation($relName);
1639        # Get the list of fields.
1640        for my $fieldData (@{$relation->{Fields}}) {
1641            # Get the field type and add its length.
1642            my $fieldLen = $TypeTable{$fieldData->{type}}->{avgLen};
1643            $retVal += $fieldLen;
1644        }
1645        # Return the result.
1646        return $retVal;
1647    }
1648    
1649  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1650    
# Line 1499  Line 2021 
2021  sub _LoadMetaData {  sub _LoadMetaData {
2022          # Get the parameters.          # Get the parameters.
2023          my ($filename) = @_;          my ($filename) = @_;
2024        Trace("Reading Sprout DBD from $filename.") if T(2);
2025          # Slurp the XML file into a variable. Extensive use of options is used to insure we          # Slurp the XML file into a variable. Extensive use of options is used to insure we
2026          # get the exact structure we want.          # get the exact structure we want.
2027          my $metadata = XML::Simple::XMLin($filename,          my $metadata = XML::Simple::XMLin($filename,
# Line 1523  Line 2046 
2046          my %masterRelationTable = ();          my %masterRelationTable = ();
2047          # Loop through the entities.          # Loop through the entities.
2048          my $entityList = $metadata->{Entities};          my $entityList = $metadata->{Entities};
2049          while (my ($entityName, $entityStructure) = each %{$entityList}) {      for my $entityName (keys %{$entityList}) {
2050            my $entityStructure = $entityList->{$entityName};
2051                  #                  #
2052                  # The first step is to run creating all the entity's default values. For C<Field> elements,          # The first step is to create all the entity's default values. For C<Field> elements,
2053                  # the relation name must be added where it is not specified. For relationships,                  # the relation name must be added where it is not specified. For relationships,
2054                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>                  # the B<from-link> and B<to-link> fields must be inserted, and for entities an B<id>
2055                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute                  # field must be added to each relation. Finally, each field will have a C<PrettySort> attribute
# Line 1571  Line 2095 
2095                  # to a list of fields. First, we need the ID field itself.                  # to a list of fields. First, we need the ID field itself.
2096                  my $idField = $fieldList->{id};                  my $idField = $fieldList->{id};
2097                  # Loop through the relations.                  # Loop through the relations.
2098                  while (my ($relationName, $relation) = each %{$relationTable}) {          for my $relationName (keys %{$relationTable}) {
2099                my $relation = $relationTable->{$relationName};
2100                          # Get the relation's field list.                          # Get the relation's field list.
2101                          my $relationFieldList = $relation->{Fields};                          my $relationFieldList = $relation->{Fields};
2102                          # Add the ID field to it. If the field's already there, it will not make any                          # Add the ID field to it. If the field's already there, it will not make any
# Line 1621  Line 2146 
2146                  # The next step is to insure that each relation has at least one index that begins with the ID field.                  # The next step is to insure that each relation has at least one index that begins with the ID field.
2147                  # After that, we convert each relation's index list to an index table. We first need to loop through                  # After that, we convert each relation's index list to an index table. We first need to loop through
2148                  # the relations.                  # the relations.
2149                  while (my ($relationName, $relation) = each %{$relationTable}) {          for my $relationName (keys %{$relationTable}) {
2150                my $relation = $relationTable->{$relationName};
2151                          # Get the relation's index list.                          # Get the relation's index list.
2152                          my $indexList = $relation->{Indexes};                          my $indexList = $relation->{Indexes};
2153                          # Insure this relation has an ID index.                          # Insure this relation has an ID index.
# Line 1652  Line 2178 
2178          # Loop through the relationships. Relationships actually turn out to be much simpler than entities.          # Loop through the relationships. Relationships actually turn out to be much simpler than entities.
2179          # For one thing, there is only a single constituent relation.          # For one thing, there is only a single constituent relation.
2180          my $relationshipList = $metadata->{Relationships};          my $relationshipList = $metadata->{Relationships};
2181          while (my ($relationshipName, $relationshipStructure) = each %{$relationshipList}) {      for my $relationshipName (keys %{$relationshipList}) {
2182            my $relationshipStructure = $relationshipList->{$relationshipName};
2183                  # Fix up this relationship.                  # Fix up this relationship.
2184                  _FixupFields($relationshipStructure, $relationshipName, 2, 3);                  _FixupFields($relationshipStructure, $relationshipName, 2, 3);
2185                  # Format a description for the FROM field.                  # Format a description for the FROM field.
# Line 1701  Line 2228 
2228                  my @fromList = ();                  my @fromList = ();
2229                  my @toList = ();                  my @toList = ();
2230                  my @bothList = ();                  my @bothList = ();
2231                  while (my ($relationshipName, $relationship) = each %{$relationshipList}) {          Trace("Join table build for $entityName.") if T(metadata => 4);
2232            for my $relationshipName (keys %{$relationshipList}) {
2233                my $relationship = $relationshipList->{$relationshipName};
2234                          # Determine if this relationship has our entity in one of its link fields.                          # Determine if this relationship has our entity in one of its link fields.
2235                          if ($relationship->{from} eq $entityName) {              my $fromEntity = $relationship->{from};
2236                                  if ($relationship->{to} eq $entityName) {              my $toEntity = $relationship->{to};
2237                Trace("Join check for relationship $relationshipName from $fromEntity to $toEntity.") if T(4);
2238                if ($fromEntity eq $entityName) {
2239                    if ($toEntity eq $entityName) {
2240                                          # Here the relationship is recursive.                                          # Here the relationship is recursive.
2241                                          push @bothList, $relationshipName;                                          push @bothList, $relationshipName;
2242                        Trace("Relationship $relationshipName put in both-list.") if T(metadata => 4);
2243                                  } else {                                  } else {
2244                                          # Here the relationship comes from the entity.                                          # Here the relationship comes from the entity.
2245                                          push @fromList, $relationshipName;                                          push @fromList, $relationshipName;
2246                        Trace("Relationship $relationshipName put in from-list.") if T(metadata => 4);
2247                                  }                                  }
2248                          } elsif ($relationship->{to} eq $entityName) {              } elsif ($toEntity eq $entityName) {
2249                                  # Here the relationship goes to the entity.                                  # Here the relationship goes to the entity.
2250                                  push @toList, $relationshipName;                                  push @toList, $relationshipName;
2251                    Trace("Relationship $relationshipName put in to-list.") if T(metadata => 4);
2252                          }                          }
2253                  }                  }
2254                  # Create the nonrecursive joins. Note that we build two hashes for running                  # Create the nonrecursive joins. Note that we build two hashes for running
# Line 1722  Line 2257 
2257                  # hash table at the same time.                  # hash table at the same time.
2258                  my %directRelationships = ( from => \@fromList, to => \@toList );                  my %directRelationships = ( from => \@fromList, to => \@toList );
2259                  my %otherRelationships = ( from => \@fromList, to => \@toList );                  my %otherRelationships = ( from => \@fromList, to => \@toList );
2260                  while (my ($linkType, $relationships) = each %directRelationships) {          for my $linkType (keys %directRelationships) {
2261                my $relationships = $directRelationships{$linkType};
2262                          # Loop through all the relationships.                          # Loop through all the relationships.
2263                          for my $relationshipName (@{$relationships}) {                          for my $relationshipName (@{$relationships}) {
2264                                  # Create joins between the entity and this relationship.                                  # Create joins between the entity and this relationship.
2265                                  my $linkField = "$relationshipName.${linkType}_link";                                  my $linkField = "$relationshipName.${linkType}_link";
2266                                  my $joinClause = "$entityName.id = $linkField";                                  my $joinClause = "$entityName.id = $linkField";
2267                    Trace("Entity join clause is $joinClause for $entityName and $relationshipName.") if T(metadata => 4);
2268                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;                                  $joinTable{"$entityName/$relationshipName"} = $joinClause;
2269                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;                                  $joinTable{"$relationshipName/$entityName"} = $joinClause;
2270                                  # Create joins between this relationship and the other relationships.                                  # Create joins between this relationship and the other relationships.
2271                                  while (my ($otherType, $otherships) = each %otherRelationships) {                  for my $otherType (keys %otherRelationships) {
2272                        my $otherships = $otherRelationships{$otherType};
2273                                          for my $otherName (@{$otherships}) {                                          for my $otherName (@{$otherships}) {
2274                                                  # Get the key for this join.                                                  # Get the key for this join.
2275                                                  my $joinKey = "$otherName/$relationshipName";                                                  my $joinKey = "$otherName/$relationshipName";
# Line 1741  Line 2279 
2279                                                          # path is ambiguous. We delete the join from the join                                                          # path is ambiguous. We delete the join from the join
2280                                                          # table to prevent it from being used.                                                          # table to prevent it from being used.
2281                                                          delete $joinTable{$joinKey};                                                          delete $joinTable{$joinKey};
2282                                Trace("Deleting ambiguous join $joinKey.") if T(4);
2283                                                  } elsif ($otherName ne $relationshipName) {                                                  } elsif ($otherName ne $relationshipName) {
2284                                                          # Here we have a valid join. Note that joins between a                                                          # Here we have a valid join. Note that joins between a
2285                                                          # relationship and itself are prohibited.                                                          # relationship and itself are prohibited.
2286                                                          $joinTable{$joinKey} = "$otherName.${otherType}_link = $linkField";                              my $relJoinClause = "$otherName.${otherType}_link = $linkField";
2287                                $joinTable{$joinKey} = $relJoinClause;
2288                                Trace("Relationship join clause is $relJoinClause for $joinKey.") if T(metadata => 4);
2289                                                  }                                                  }
2290                                          }                                          }
2291                                  }                                  }
# Line 1753  Line 2294 
2294                                  # relationship can only be ambiguous with another recursive relationship,                                  # relationship can only be ambiguous with another recursive relationship,
2295                                  # and the incoming relationship from the outer loop is never recursive.                                  # and the incoming relationship from the outer loop is never recursive.
2296                                  for my $otherName (@bothList) {                                  for my $otherName (@bothList) {
2297                        Trace("Setting up relationship joins to recursive relationship $otherName with $relationshipName.") if T(metadata => 4);
2298                                          # Join from the left.                                          # Join from the left.
2299                                          $joinTable{"$relationshipName/$otherName"} =                                          $joinTable{"$relationshipName/$otherName"} =
2300                                                  "$linkField = $otherName.from_link";                                                  "$linkField = $otherName.from_link";
# Line 1767  Line 2309 
2309                  # rise to situations where we can't create the path we want; however, it is always                  # rise to situations where we can't create the path we want; however, it is always
2310                  # possible to get the same effect using multiple queries.                  # possible to get the same effect using multiple queries.
2311                  for my $relationshipName (@bothList) {                  for my $relationshipName (@bothList) {
2312                Trace("Setting up entity joins to recursive relationship $relationshipName with $entityName.") if T(metadata => 4);
2313                          # Join to the entity from each direction.                          # Join to the entity from each direction.
2314                          $joinTable{"$entityName/$relationshipName"} =                          $joinTable{"$entityName/$relationshipName"} =
2315                                  "$entityName.id = $relationshipName.from_link";                                  "$entityName.id = $relationshipName.from_link";
# Line 1817  Line 2360 
2360          # index descriptor does not exist, it will be created automatically so we can add          # index descriptor does not exist, it will be created automatically so we can add
2361          # the field to it.          # the field to it.
2362          unshift @{$newIndex->{IndexFields}}, $firstField;          unshift @{$newIndex->{IndexFields}}, $firstField;
2363        # If this is a one-to-many relationship, the "To" index is unique.
2364        if ($relationshipStructure->{arity} eq "1M" && $indexKey eq "To") {
2365            $newIndex->{Unique} = 'true';
2366        }
2367          # Add the index to the relation.          # Add the index to the relation.
2368          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);          _AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex);
2369  }  }
# Line 1906  Line 2453 
2453                  $structure->{Fields} = { };                  $structure->{Fields} = { };
2454          } else {          } else {
2455                  # Here we have a field list. Loop through its fields.                  # Here we have a field list. Loop through its fields.
2456                  while (my ($fieldName, $fieldData) = each %{$structure->{Fields}}) {          my $fieldStructures = $structure->{Fields};
2457            for my $fieldName (keys %{$fieldStructures}) {
2458                Trace("Processing field $fieldName of $defaultRelationName.") if T(4);
2459                my $fieldData = $fieldStructures->{$fieldName};
2460                          # Get the field type.                          # Get the field type.
2461                          my $type = $fieldData->{type};                          my $type = $fieldData->{type};
2462                          # Plug in a relation name if it is needed.                          # Plug in a relation name if it is needed.

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3