11 |
use Time::HiRes qw(gettimeofday); |
use Time::HiRes qw(gettimeofday); |
12 |
use Digest::MD5 qw(md5_base64); |
use Digest::MD5 qw(md5_base64); |
13 |
use FIG; |
use FIG; |
14 |
|
use CGI; |
15 |
|
|
16 |
=head1 Entity-Relationship Database Package |
=head1 Entity-Relationship Database Package |
17 |
|
|
60 |
B<start-position>, which indicates where in the contig that the sequence begins. This attribute |
B<start-position>, which indicates where in the contig that the sequence begins. This attribute |
61 |
is implemented as the C<start_position> field in the C<IsMadeUpOf> relation. |
is implemented as the C<start_position> field in the C<IsMadeUpOf> relation. |
62 |
|
|
63 |
The database itself is described by an XML file using the F<ERDatabase.xsd> schema. In addition to |
The database itself is described by an XML file. In addition to all the data required to define |
64 |
all the data required to define the entities, relationships, and attributes, the schema provides |
the entities, relationships, and attributes, the schema provides space for notes describing |
65 |
space for notes describing the data and what it means. These notes are used by L</ShowMetaData> |
the data and what it means. These notes are used by L</ShowMetaData> to generate documentation |
66 |
to generate documentation for the database. |
for the database. |
67 |
|
|
68 |
|
Special support is provided for text searching. An entity field can be marked as <em>searchable</em>, |
69 |
|
in which case it will be used to generate a text search index in which the user searches for words |
70 |
|
in the field instead of a particular field value. |
71 |
|
|
72 |
Finally, every entity and relationship object has a flag indicating if it is new or old. The object |
Finally, every entity and relationship object has a flag indicating if it is new or old. The object |
73 |
is considered I<old> if it was loaded by the L</LoadTables> method. It is considered I<new> if it |
is considered I<old> if it was loaded by the L</LoadTables> method. It is considered I<new> if it |
74 |
was inserted by the L</InsertObject> method. |
was inserted by the L</InsertObject> method. |
75 |
|
|
|
To facilitate testing, the ERDB module supports automatic generation of test data. This process |
|
|
is described in the L</GenerateEntity> and L</GenerateConnection> methods, though it is not yet |
|
|
fully implemented. |
|
|
|
|
76 |
=head2 XML Database Description |
=head2 XML Database Description |
77 |
|
|
78 |
=head3 Data Types |
=head3 Data Types |
218 |
index will be created for each relation with at least one searchable field in it. |
index will be created for each relation with at least one searchable field in it. |
219 |
For best results, this option should only be used for string or text fields. |
For best results, this option should only be used for string or text fields. |
220 |
|
|
221 |
|
=item special |
222 |
|
|
223 |
|
This attribute allows the subclass to assign special meaning for certain fields. |
224 |
|
The interpretation is up to the subclass itself. Currently, only entity fields |
225 |
|
can have this attribute. |
226 |
|
|
227 |
=back |
=back |
228 |
|
|
229 |
=head3 Indexes |
=head3 Indexes |
230 |
|
|
231 |
An entity can have multiple alternate indexes associated with it. The fields must |
An entity can have multiple alternate indexes associated with it. The fields must |
232 |
be from the primary relation. The alternate indexes assist in ordering results |
all be from the same relation. The alternate indexes assist in ordering results |
233 |
from a query. A relationship can have up to two indexes-- a I<to-index> and a |
from a query. A relationship can have up to two indexes-- a I<to-index> and a |
234 |
I<from-index>. These order the results when crossing the relationship. For |
I<from-index>. These order the results when crossing the relationship. For |
235 |
example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the |
example, in the relationship C<HasContig> from C<Genome> to C<Contig>, the |
335 |
|
|
336 |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
# Table of information about our datatypes. "sqlType" is the corresponding SQL datatype string. |
337 |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
338 |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
# of the specified type. "avgLen" is the average byte length for estimating |
339 |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
# record sizes. "sort" is the key modifier for the sort command, "notes" is a type description, |
340 |
# record sizes. "sort" is the key modifier for the sort command. |
# and "indexMod", if non-zero, is the number of characters to use when the field is specified in an |
341 |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, sort => "", dataGen => "StringGen('A')" }, |
# index |
342 |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, sort => "", |
343 |
counter => { sqlType => 'INTEGER UNSIGNED', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
indexMod => 0, notes => "single ASCII character"}, |
344 |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, sort => "", dataGen => "StringGen(IntGen(10,250))" }, |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, sort => "n", |
345 |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, sort => "", dataGen => "StringGen(IntGen(80,1000))" }, |
indexMod => 0, notes => "signed 32-bit integer"}, |
346 |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, sort => "n", dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
counter => { sqlType => 'INTEGER UNSIGNED', maxLen => 20, avgLen => 4, sort => "n", |
347 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, sort => "g", dataGen => "FloatGen(0.0, 100.0)" }, |
indexMod => 0, notes => "unsigned 32-bit integer"}, |
348 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, sort => "n", dataGen => "IntGen(0, 1)" }, |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, sort => "", |
349 |
|
indexMod => 0, notes => "character string, 0 to 255 characters"}, |
350 |
|
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, sort => "", |
351 |
|
indexMod => 255, notes => "character string, nearly unlimited length, only first 255 characters are indexed"}, |
352 |
|
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, sort => "n", |
353 |
|
indexMod => 0, notes => "signed, 64-bit integer"}, |
354 |
|
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, sort => "g", |
355 |
|
indexMod => 0, notes => "64-bit double precision floating-point number"}, |
356 |
|
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, sort => "n", |
357 |
|
indexMod => 0, notes => "boolean value: 0 if false, 1 if true"}, |
358 |
'hash-string' => |
'hash-string' => |
359 |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, sort => "", dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, sort => "", |
360 |
|
indexMod => 0, notes => "string stored in digested form, used for certain types of key fields"}, |
361 |
'id-string' => |
'id-string' => |
362 |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, sort => "", dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, sort => "", |
363 |
|
indexMod => 0, notes => "character string, 0 to 25 characters"}, |
364 |
'key-string' => |
'key-string' => |
365 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, sort => "", dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, sort => "", |
366 |
|
indexMod => 0, notes => "character string, 0 to 40 characters"}, |
367 |
'name-string' => |
'name-string' => |
368 |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,80))" }, |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, sort => "", |
369 |
|
indexMod => 0, notes => "character string, 0 to 80 characters"}, |
370 |
'medium-string' => |
'medium-string' => |
371 |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,160))" }, |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, sort => "", |
372 |
|
indexMod => 0, notes => "character string, 0 to 160 characters"}, |
373 |
); |
); |
374 |
|
|
375 |
# Table translating arities into natural language. |
# Table translating arities into natural language. |
378 |
'MM' => 'many-to-many' |
'MM' => 'many-to-many' |
379 |
); |
); |
380 |
|
|
381 |
# Table for interpreting string patterns. |
# Options for XML input and output. |
382 |
|
|
383 |
|
my %XmlOptions = (GroupTags => { Relationships => 'Relationship', |
384 |
|
Entities => 'Entity', |
385 |
|
Fields => 'Field', |
386 |
|
Indexes => 'Index', |
387 |
|
IndexFields => 'IndexField' |
388 |
|
}, |
389 |
|
KeyAttr => { Relationship => 'name', |
390 |
|
Entity => 'name', |
391 |
|
Field => 'name' |
392 |
|
}, |
393 |
|
SuppressEmpty => 1, |
394 |
|
); |
395 |
|
|
396 |
my %PictureTable = ( 'A' => "abcdefghijklmnopqrstuvwxyz", |
my %XmlInOpts = ( |
397 |
'9' => "0123456789", |
ForceArray => ['Field', 'Index', 'IndexField'], |
398 |
'X' => "abcdefghijklmnopqrstuvwxyz0123456789", |
ForceContent => 1, |
399 |
'V' => "aeiou", |
NormalizeSpace => 2, |
|
'K' => "bcdfghjklmnoprstvwxyz" |
|
400 |
); |
); |
401 |
|
my %XmlOutOpts = ( |
402 |
|
RootName => 'Database', |
403 |
|
XMLDecl => 1, |
404 |
|
); |
405 |
|
|
406 |
|
|
407 |
=head2 Public Methods |
=head2 Public Methods |
408 |
|
|
544 |
my $entityData = $entityList->{$key}; |
my $entityData = $entityList->{$key}; |
545 |
# If there's descriptive text, display it. |
# If there's descriptive text, display it. |
546 |
if (my $notes = $entityData->{Notes}) { |
if (my $notes = $entityData->{Notes}) { |
547 |
$retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
$retVal .= "<p>" . HTMLNote($notes->{content}) . "</p>\n"; |
548 |
} |
} |
549 |
# Now we want a list of the entity's relationships. First, we set up the relationship subsection. |
# Now we want a list of the entity's relationships. First, we set up the relationship subsection. |
550 |
$retVal .= "<h4>Relationships for <b>$key</b></h4>\n<ul>\n"; |
$retVal .= "<h4>Relationships for <b>$key</b></h4>\n<ul>\n"; |
601 |
$retVal .= "</p>\n"; |
$retVal .= "</p>\n"; |
602 |
# If there are notes on this relationship, display them. |
# If there are notes on this relationship, display them. |
603 |
if (my $notes = $relationshipStructure->{Notes}) { |
if (my $notes = $relationshipStructure->{Notes}) { |
604 |
$retVal .= "<p>" . _HTMLNote($notes->{content}) . "</p>\n"; |
$retVal .= "<p>" . HTMLNote($notes->{content}) . "</p>\n"; |
605 |
} |
} |
606 |
# Generate the relationship's relation table. |
# Generate the relationship's relation table. |
607 |
my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key}); |
my $htmlString = _ShowRelationTable($key, $relationshipStructure->{Relations}->{$key}); |
648 |
return Data::Dumper::Dumper($self->{_metaData}); |
return Data::Dumper::Dumper($self->{_metaData}); |
649 |
} |
} |
650 |
|
|
651 |
|
=head3 FindIndexForEntity |
652 |
|
|
653 |
|
C<< my $indexFound = ERDB::FindIndexForEntity($xml, $entityName, $attributeName); >> |
654 |
|
|
655 |
|
This method locates the entry in an entity's index list that begins with the |
656 |
|
specified attribute name. If the entity has no index list, one will be |
657 |
|
created. This method works on raw XML, not a live ERDB object. |
658 |
|
|
659 |
|
=over 4 |
660 |
|
|
661 |
|
=item xml |
662 |
|
|
663 |
|
The raw XML structure defining the database. |
664 |
|
|
665 |
|
=item entityName |
666 |
|
|
667 |
|
The name of the relevant entity. |
668 |
|
|
669 |
|
=item attributeName |
670 |
|
|
671 |
|
The name of the attribute relevant to the search. |
672 |
|
|
673 |
|
=item RETURN |
674 |
|
|
675 |
|
The numerical index in the index list of the index entry for the specified entity and |
676 |
|
attribute, or C<undef> if no such index exists. |
677 |
|
|
678 |
|
=back |
679 |
|
|
680 |
|
=cut |
681 |
|
|
682 |
|
sub FindIndexForEntity { |
683 |
|
# Get the parameters. |
684 |
|
my ($xml, $entityName, $attributeName) = @_; |
685 |
|
# Declare the return variable. |
686 |
|
my $retVal; |
687 |
|
# Get the named entity. |
688 |
|
my $entityData = $xml->{Entities}->{$entityName}; |
689 |
|
if (! $entityData) { |
690 |
|
Confess("Entity $entityName not found in DBD structure."); |
691 |
|
} else { |
692 |
|
# Insure it has an index list. |
693 |
|
if (! exists $entityData->{Indexes}) { |
694 |
|
$entityData->{Indexes} = []; |
695 |
|
} else { |
696 |
|
# Search for the desired index. |
697 |
|
my $indexList = $entityData->{Indexes}; |
698 |
|
my $n = scalar @{$indexList}; |
699 |
|
Trace("Searching $n indexes in index list for $entityName.") if T(2); |
700 |
|
# We use an indexed FOR here because we're returning an |
701 |
|
# index number instead of an object. We do THAT so we can |
702 |
|
# delete the index from the list if needed. |
703 |
|
for (my $i = 0; $i < $n && !defined($retVal); $i++) { |
704 |
|
my $index = $indexList->[$i]; |
705 |
|
my $fields = $index->{IndexFields}; |
706 |
|
# Technically this IF should be safe (that is, we are guaranteed |
707 |
|
# the existence of a "$fields->[0]"), because when we load the XML |
708 |
|
# we have SuppressEmpty specified. |
709 |
|
if ($fields->[0]->{name} eq $attributeName) { |
710 |
|
$retVal = $i; |
711 |
|
} |
712 |
|
} |
713 |
|
} |
714 |
|
} |
715 |
|
Trace("Index for $attributeName of $entityName found at position $retVal.") if defined($retVal) && T(3); |
716 |
|
Trace("Index for $attributeName not found in $entityName.") if !defined($retVal) && T(3); |
717 |
|
# Return the result. |
718 |
|
return $retVal; |
719 |
|
} |
720 |
|
|
721 |
=head3 CreateTables |
=head3 CreateTables |
722 |
|
|
723 |
C<< $erdb->CreateTables(); >> |
C<< $erdb->CreateTables(); >> |
963 |
for my $indexName (keys %{$indexHash}) { |
for my $indexName (keys %{$indexHash}) { |
964 |
my $indexData = $indexHash->{$indexName}; |
my $indexData = $indexHash->{$indexName}; |
965 |
# Get the index's field list. |
# Get the index's field list. |
966 |
my @fieldList = _FixNames(@{$indexData->{IndexFields}}); |
my @rawFields = @{$indexData->{IndexFields}}; |
967 |
|
# Get a hash of the relation's field types. |
968 |
|
my %types = map { $_->{name} => $_->{type} } @{$relationData->{Fields}}; |
969 |
|
# We need to check for text fields. We need a append a length limitation for them. To do |
970 |
|
# that, we need the relation's field list. |
971 |
|
my $relFields = $relationData->{Fields}; |
972 |
|
for (my $i = 0; $i <= $#rawFields; $i++) { |
973 |
|
# Get the field type. |
974 |
|
my $field = $rawFields[$i]; |
975 |
|
my $type = $types{$field}; |
976 |
|
# Ask if it requires using prefix notation for the index. |
977 |
|
my $mod = $TypeTable{$type}->{indexMod}; |
978 |
|
Trace("Field $field ($i) in $relationName has type $type and indexMod $mod.") if T(3); |
979 |
|
if ($mod) { |
980 |
|
# Append the prefix length to the field name, |
981 |
|
$rawFields[$i] .= "($mod)"; |
982 |
|
} |
983 |
|
} |
984 |
|
my @fieldList = _FixNames(@rawFields); |
985 |
my $flds = join(', ', @fieldList); |
my $flds = join(', ', @fieldList); |
986 |
# Get the index's uniqueness flag. |
# Get the index's uniqueness flag. |
987 |
my $unique = (exists $indexData->{Unique} ? 'unique' : undef); |
my $unique = (exists $indexData->{Unique} ? 'unique' : undef); |
996 |
} |
} |
997 |
} |
} |
998 |
|
|
999 |
|
=head3 GetSecondaryFields |
1000 |
|
|
1001 |
|
C<< my %fieldTuples = $erdb->GetSecondaryFields($entityName); >> |
1002 |
|
|
1003 |
|
This method will return a list of the name and type of each of the secondary |
1004 |
|
fields for a specified entity. Secondary fields are stored in two-column tables |
1005 |
|
in addition to the primary entity table. This enables the field to have no value |
1006 |
|
or to have multiple values. |
1007 |
|
|
1008 |
|
=over 4 |
1009 |
|
|
1010 |
|
=item entityName |
1011 |
|
|
1012 |
|
Name of the entity whose secondary fields are desired. |
1013 |
|
|
1014 |
|
=item RETURN |
1015 |
|
|
1016 |
|
Returns a hash mapping the field names to their field types. |
1017 |
|
|
1018 |
|
=back |
1019 |
|
|
1020 |
|
=cut |
1021 |
|
|
1022 |
|
sub GetSecondaryFields { |
1023 |
|
# Get the parameters. |
1024 |
|
my ($self, $entityName) = @_; |
1025 |
|
# Declare the return variable. |
1026 |
|
my %retVal = (); |
1027 |
|
# Look for the entity. |
1028 |
|
my $table = $self->GetFieldTable($entityName); |
1029 |
|
# Loop through the fields, pulling out the secondaries. |
1030 |
|
for my $field (sort keys %{$table}) { |
1031 |
|
if ($table->{$field}->{relation} ne $entityName) { |
1032 |
|
# Here we have a secondary field. |
1033 |
|
$retVal{$field} = $table->{$field}->{type}; |
1034 |
|
} |
1035 |
|
} |
1036 |
|
# Return the result. |
1037 |
|
return %retVal; |
1038 |
|
} |
1039 |
|
|
1040 |
|
=head3 GetFieldRelationName |
1041 |
|
|
1042 |
|
C<< my $name = $erdb->GetFieldRelationName($objectName, $fieldName); >> |
1043 |
|
|
1044 |
|
Return the name of the relation containing a specified field. |
1045 |
|
|
1046 |
|
=over 4 |
1047 |
|
|
1048 |
|
=item objectName |
1049 |
|
|
1050 |
|
Name of the entity or relationship containing the field. |
1051 |
|
|
1052 |
|
=item fieldName |
1053 |
|
|
1054 |
|
Name of the relevant field in that entity or relationship. |
1055 |
|
|
1056 |
|
=item RETURN |
1057 |
|
|
1058 |
|
Returns the name of the database relation containing the field, or C<undef> if |
1059 |
|
the field does not exist. |
1060 |
|
|
1061 |
|
=back |
1062 |
|
|
1063 |
|
=cut |
1064 |
|
|
1065 |
|
sub GetFieldRelationName { |
1066 |
|
# Get the parameters. |
1067 |
|
my ($self, $objectName, $fieldName) = @_; |
1068 |
|
# Declare the return variable. |
1069 |
|
my $retVal; |
1070 |
|
# Get the object field table. |
1071 |
|
my $table = $self->GetFieldTable($objectName); |
1072 |
|
# Only proceed if the field exists. |
1073 |
|
if (exists $table->{$fieldName}) { |
1074 |
|
# Determine the name of the relation that contains this field. |
1075 |
|
$retVal = $table->{$fieldName}->{relation}; |
1076 |
|
} |
1077 |
|
# Return the result. |
1078 |
|
return $retVal; |
1079 |
|
} |
1080 |
|
|
1081 |
|
=head3 DeleteValue |
1082 |
|
|
1083 |
|
C<< my $numDeleted = $erdb->DeleteValue($entityName, $id, $fieldName, $fieldValue); >> |
1084 |
|
|
1085 |
|
Delete secondary field values from the database. This method can be used to delete all |
1086 |
|
values of a specified field for a particular entity instance, or only a single value. |
1087 |
|
|
1088 |
|
Secondary fields are stored in two-column relations separate from an entity's primary |
1089 |
|
table, and as a result a secondary field can legitimately have no value or multiple |
1090 |
|
values. Therefore, it makes sense to talk about deleting secondary fields where it |
1091 |
|
would not make sense for primary fields. |
1092 |
|
|
1093 |
|
=over 4 |
1094 |
|
|
1095 |
|
=item entityName |
1096 |
|
|
1097 |
|
Name of the entity from which the fields are to be deleted. |
1098 |
|
|
1099 |
|
=item id |
1100 |
|
|
1101 |
|
ID of the entity instance to be processed. If the instance is not found, this |
1102 |
|
method will have no effect. If C<undef> is specified, all values for all of |
1103 |
|
the entity instances will be deleted. |
1104 |
|
|
1105 |
|
=item fieldName |
1106 |
|
|
1107 |
|
Name of the field whose values are to be deleted. |
1108 |
|
|
1109 |
|
=item fieldValue (optional) |
1110 |
|
|
1111 |
|
Value to be deleted. If not specified, then all values of the specified field |
1112 |
|
will be deleted for the entity instance. If specified, then only the values which |
1113 |
|
match this parameter will be deleted. |
1114 |
|
|
1115 |
|
=item RETURN |
1116 |
|
|
1117 |
|
Returns the number of rows deleted. |
1118 |
|
|
1119 |
|
=back |
1120 |
|
|
1121 |
|
=cut |
1122 |
|
|
1123 |
|
sub DeleteValue { |
1124 |
|
# Get the parameters. |
1125 |
|
my ($self, $entityName, $id, $fieldName, $fieldValue) = @_; |
1126 |
|
# Declare the return value. |
1127 |
|
my $retVal = 0; |
1128 |
|
# We need to set up an SQL command to do the deletion. First, we |
1129 |
|
# find the name of the field's relation. |
1130 |
|
my $table = $self->GetFieldTable($entityName); |
1131 |
|
my $field = $table->{$fieldName}; |
1132 |
|
my $relation = $field->{relation}; |
1133 |
|
# Make sure this is a secondary field. |
1134 |
|
if ($relation eq $entityName) { |
1135 |
|
Confess("Cannot delete values of $fieldName for $entityName."); |
1136 |
|
} else { |
1137 |
|
# Set up the SQL command to delete all values. |
1138 |
|
my $sql = "DELETE FROM $relation"; |
1139 |
|
# Build the filter. |
1140 |
|
my @filters = (); |
1141 |
|
my @parms = (); |
1142 |
|
# Check for a filter by ID. |
1143 |
|
if (defined $id) { |
1144 |
|
push @filters, "id = ?"; |
1145 |
|
push @parms, $id; |
1146 |
|
} |
1147 |
|
# Check for a filter by value. |
1148 |
|
if (defined $fieldValue) { |
1149 |
|
push @filters, "$fieldName = ?"; |
1150 |
|
push @parms, $fieldValue; |
1151 |
|
} |
1152 |
|
# Append the filters to the command. |
1153 |
|
if (@filters) { |
1154 |
|
$sql .= " WHERE " . join(" AND ", @filters); |
1155 |
|
} |
1156 |
|
# Execute the command. |
1157 |
|
my $dbh = $self->{_dbh}; |
1158 |
|
$retVal = $dbh->SQL($sql, 0, @parms); |
1159 |
|
} |
1160 |
|
# Return the result. |
1161 |
|
return $retVal; |
1162 |
|
} |
1163 |
|
|
1164 |
=head3 LoadTables |
=head3 LoadTables |
1165 |
|
|
1166 |
C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >> |
C<< my $stats = $erdb->LoadTables($directoryName, $rebuild); >> |
1255 |
return sort keys %{$entityList}; |
return sort keys %{$entityList}; |
1256 |
} |
} |
1257 |
|
|
1258 |
|
=head3 GetDataTypes |
1259 |
|
|
1260 |
|
C<< my %types = ERDB::GetDataTypes(); >> |
1261 |
|
|
1262 |
|
Return a table of ERDB data types. The table returned is a hash of hashes. |
1263 |
|
The keys of the big hash are the datatypes. Each smaller hash has several |
1264 |
|
values used to manage the data. The most interesting is the SQL type (key |
1265 |
|
C<sqlType>) and the descriptive node (key C<notes>). |
1266 |
|
|
1267 |
|
Note that changing the values in the smaller hashes will seriously break |
1268 |
|
things, so this data should be treated as read-only. |
1269 |
|
|
1270 |
|
=cut |
1271 |
|
|
1272 |
|
sub GetDataTypes { |
1273 |
|
return %TypeTable; |
1274 |
|
} |
1275 |
|
|
1276 |
|
|
1277 |
=head3 IsEntity |
=head3 IsEntity |
1278 |
|
|
1279 |
C<< my $flag = $erdb->IsEntity($entityName); >> |
C<< my $flag = $erdb->IsEntity($entityName); >> |
1431 |
|
|
1432 |
=item searchExpression |
=item searchExpression |
1433 |
|
|
1434 |
Boolean search expression for the text fields of the target object. |
Boolean search expression for the text fields of the target object. The default mode for |
1435 |
|
a Boolean search expression is OR, but we want the default to be AND, so we will |
1436 |
|
add a C<+> operator to each word with no other operator before it. |
1437 |
|
|
1438 |
=item idx |
=item idx |
1439 |
|
|
1470 |
my ($self, $searchExpression, $idx, $objectNames, $filterClause, $params) = @_; |
my ($self, $searchExpression, $idx, $objectNames, $filterClause, $params) = @_; |
1471 |
# Declare the return variable. |
# Declare the return variable. |
1472 |
my $retVal; |
my $retVal; |
1473 |
# Create a safety copy of the parameter list. |
# Create a safety copy of the parameter list. Note we have to be careful to insure |
1474 |
my @myParams = @{$params}; |
# a parameter list exists before we copy it. |
1475 |
|
my @myParams = (); |
1476 |
|
if (defined $params) { |
1477 |
|
@myParams = @{$params}; |
1478 |
|
} |
1479 |
# Get the first object's structure so we have access to the searchable fields. |
# Get the first object's structure so we have access to the searchable fields. |
1480 |
my $object1Name = $objectNames->[$idx]; |
my $object1Name = $objectNames->[$idx]; |
1481 |
my $object1Structure = $self->_GetStructure($object1Name); |
my $object1Structure = $self->_GetStructure($object1Name); |
1485 |
} else { |
} else { |
1486 |
# Get the field list. |
# Get the field list. |
1487 |
my @fields = @{$object1Structure->{searchFields}}; |
my @fields = @{$object1Structure->{searchFields}}; |
1488 |
|
# Clean the search expression. |
1489 |
|
my $actualKeywords = $self->CleanKeywords($searchExpression); |
1490 |
|
# Prefix a "+" to each uncontrolled word. This converts the default |
1491 |
|
# search mode from OR to AND. |
1492 |
|
$actualKeywords =~ s/(^|\s)(\w)/$1\+$2/g; |
1493 |
|
Trace("Actual keywords for search are\n$actualKeywords") if T(3); |
1494 |
# We need two match expressions, one for the filter clause and one in the |
# We need two match expressions, one for the filter clause and one in the |
1495 |
# query itself. Both will use a parameter mark, so we need to push the |
# query itself. Both will use a parameter mark, so we need to push the |
1496 |
# search expression onto the front of the parameter list twice. |
# search expression onto the front of the parameter list twice. |
1497 |
unshift @myParams, $searchExpression, $searchExpression; |
unshift @myParams, $actualKeywords, $actualKeywords; |
1498 |
# Build the match expression. |
# Build the match expression. |
1499 |
my @matchFilterFields = map { "$object1Name." . _FixName($_) } @fields; |
my @matchFilterFields = map { "$object1Name." . _FixName($_) } @fields; |
1500 |
my $matchClause = "MATCH (" . join(", ", @matchFilterFields) . ") AGAINST (? IN BOOLEAN MODE)"; |
my $matchClause = "MATCH (" . join(", ", @matchFilterFields) . ") AGAINST (? IN BOOLEAN MODE)"; |
1568 |
return @retVal; |
return @retVal; |
1569 |
} |
} |
1570 |
|
|
1571 |
|
=head3 SpecialFields |
1572 |
|
|
1573 |
|
C<< my %specials = $erdb->SpecialFields($entityName); >> |
1574 |
|
|
1575 |
|
Return a hash mapping special fields in the specified entity to the value of their |
1576 |
|
C<special> attribute. This enables the subclass to get access to the special field |
1577 |
|
attributes without needed to plumb the internal ERDB data structures. |
1578 |
|
|
1579 |
|
=over 4 |
1580 |
|
|
1581 |
|
=item entityName |
1582 |
|
|
1583 |
|
Name of the entity whose special fields are desired. |
1584 |
|
|
1585 |
|
=item RETURN |
1586 |
|
|
1587 |
|
Returns a hash. The keys of the hash are the special field names, and the values |
1588 |
|
are the values from each special field's C<special> attribute. |
1589 |
|
|
1590 |
|
=back |
1591 |
|
|
1592 |
|
=cut |
1593 |
|
|
1594 |
|
sub SpecialFields { |
1595 |
|
# Get the parameters. |
1596 |
|
my ($self, $entityName) = @_; |
1597 |
|
# Declare the return variable. |
1598 |
|
my %retVal = (); |
1599 |
|
# Find the entity's data structure. |
1600 |
|
my $entityData = $self->{_metaData}->{Entities}->{$entityName}; |
1601 |
|
# Loop through its fields, adding each special field to the return hash. |
1602 |
|
my $fieldHash = $entityData->{Fields}; |
1603 |
|
for my $fieldName (keys %{$fieldHash}) { |
1604 |
|
my $fieldData = $fieldHash->{$fieldName}; |
1605 |
|
if (exists $fieldData->{special}) { |
1606 |
|
$retVal{$fieldName} = $fieldData->{special}; |
1607 |
|
} |
1608 |
|
} |
1609 |
|
# Return the result. |
1610 |
|
return %retVal; |
1611 |
|
} |
1612 |
|
|
1613 |
=head3 Delete |
=head3 Delete |
1614 |
|
|
1615 |
C<< my $stats = $erdb->Delete($entityName, $objectID); >> |
C<< my $stats = $erdb->Delete($entityName, $objectID, $testFlag); >> |
1616 |
|
|
1617 |
Delete an entity instance from the database. The instance is deleted along with all entity and |
Delete an entity instance from the database. The instance is deleted along with all entity and |
1618 |
relationship instances dependent on it. The idea of dependence here is recursive. An object is |
relationship instances dependent on it. The definition of I<dependence> is recursive. |
1619 |
always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many |
|
1620 |
relationship connected to a dependent entity or the "to" entity connected to a 1-to-many |
An object is always dependent on itself. An object is dependent if it is a 1-to-many or many-to-many |
1621 |
|
relationship connected to a dependent entity or if it is the "to" entity connected to a 1-to-many |
1622 |
dependent relationship. |
dependent relationship. |
1623 |
|
|
1624 |
=over 4 |
=over 4 |
1667 |
# This final hash is used to remember what work still needs to be done. We push paths |
# This final hash is used to remember what work still needs to be done. We push paths |
1668 |
# onto the list, then pop them off to extend the paths. We prime it with the starting |
# onto the list, then pop them off to extend the paths. We prime it with the starting |
1669 |
# point. Note that we will work hard to insure that the last item on a path in the |
# point. Note that we will work hard to insure that the last item on a path in the |
1670 |
# TODO list is always an entity. |
# to-do list is always an entity. |
1671 |
my @todoList = ([$entityName]); |
my @todoList = ([$entityName]); |
1672 |
while (@todoList) { |
while (@todoList) { |
1673 |
# Get the current path. |
# Get the current path. |
1764 |
# Here the user wants to trace without executing. |
# Here the user wants to trace without executing. |
1765 |
Trace($stmt) if T(0); |
Trace($stmt) if T(0); |
1766 |
} else { |
} else { |
1767 |
# Here we can delete. Note that the SQL method dies with a confessing |
# Here we can delete. Note that the SQL method dies with a confession |
1768 |
# if an error occurs, so we just go ahead and do it. |
# if an error occurs, so we just go ahead and do it. |
1769 |
Trace("Executing delete from $target using '$objectID'.") if T(3); |
Trace("Executing delete from $target using '$objectID'.") if T(3); |
1770 |
my $rv = $db->SQL($stmt, 0, $objectID); |
my $rv = $db->SQL($stmt, 0, $objectID); |
1829 |
} elsif (exists $relationshipTable->{$relationName}) { |
} elsif (exists $relationshipTable->{$relationName}) { |
1830 |
# Here we have a relationship. We sort using the FROM index. |
# Here we have a relationship. We sort using the FROM index. |
1831 |
my $relationshipData = $relationshipTable->{$relationName}; |
my $relationshipData = $relationshipTable->{$relationName}; |
1832 |
my $index = $relationData->{Indexes}->{"idx${relationName}From"}; |
my $index = $relationData->{Indexes}->{idxFrom}; |
1833 |
push @keyNames, @{$index->{IndexFields}}; |
push @keyNames, @{$index->{IndexFields}}; |
1834 |
} else { |
} else { |
1835 |
# Here we have a secondary entity relation, so we have a sort on the ID field. |
# Here we have a secondary entity relation, so we have a sort on the ID field. |
2381 |
}; |
}; |
2382 |
if (!defined $rv) { |
if (!defined $rv) { |
2383 |
$retVal->AddMessage($@) if ($@); |
$retVal->AddMessage($@) if ($@); |
2384 |
$retVal->AddMessage("Table load failed for $relationName using $fileName."); |
$retVal->AddMessage("Table load failed for $relationName using $fileName: " . $dbh->error_message); |
2385 |
Trace("Table load failed for $relationName.") if T(1); |
Trace("Table load failed for $relationName.") if T(1); |
2386 |
} else { |
} else { |
2387 |
# Here we successfully loaded the table. |
# Here we successfully loaded the table. |
2412 |
# Get an SQL-formatted field name list. |
# Get an SQL-formatted field name list. |
2413 |
my $fields = join(", ", $self->_FixNames(@{$structure->{searchFields}})); |
my $fields = join(", ", $self->_FixNames(@{$structure->{searchFields}})); |
2414 |
# Create the index. |
# Create the index. |
2415 |
$dbh->create_index(tbl => $relationName, idx => "search_idx_$relationName", |
$dbh->create_index(tbl => $relationName, idx => "search_idx", |
2416 |
flds => $fields, kind => 'fulltext'); |
flds => $fields, kind => 'fulltext'); |
2417 |
} |
} |
2418 |
} |
} |
2426 |
return $retVal; |
return $retVal; |
2427 |
} |
} |
2428 |
|
|
2429 |
=head3 GenerateEntity |
=head3 DropRelation |
2430 |
|
|
2431 |
C<< my $fieldHash = $erdb->GenerateEntity($id, $type, \%values); >> |
C<< $erdb->DropRelation($relationName); >> |
2432 |
|
|
2433 |
Generate the data for a new entity instance. This method creates a field hash suitable for |
Physically drop a relation from the database. |
|
passing as a parameter to L</InsertObject>. The ID is specified by the callr, but the rest |
|
|
of the fields are generated using information in the database schema. |
|
|
|
|
|
Each data type has a default algorithm for generating random test data. This can be overridden |
|
|
by including a B<DataGen> element in the field. If this happens, the content of the element is |
|
|
executed as a PERL program in the context of this module. The element may make use of a C<$this> |
|
|
variable which contains the field hash as it has been built up to the current point. If any |
|
|
fields are dependent on other fields, the C<pass> attribute can be used to control the order |
|
|
in which the fields are generated. A field with a high data pass number will be generated after |
|
|
a field with a lower one. If any external values are needed, they should be passed in via the |
|
|
optional third parameter, which will be available to the data generation script under the name |
|
|
C<$value>. Several useful utility methods are provided for generating random values, including |
|
|
L</IntGen>, L</StringGen>, L</FloatGen>, and L</DateGen>. Note that dates are stored and generated |
|
|
in the form of a timestamp number rather than a string. |
|
2434 |
|
|
2435 |
=over 4 |
=over 4 |
2436 |
|
|
2437 |
=item id |
=item relationName |
|
|
|
|
ID to assign to the new entity. |
|
|
|
|
|
=item type |
|
|
|
|
|
Type name for the new entity. |
|
|
|
|
|
=item values |
|
2438 |
|
|
2439 |
Hash containing additional values that might be needed by the data generation methods (optional). |
Name of the relation to drop. If it does not exist, this method will have |
2440 |
|
no effect. |
2441 |
|
|
2442 |
=back |
=back |
2443 |
|
|
2444 |
=cut |
=cut |
2445 |
|
|
2446 |
sub GenerateEntity { |
sub DropRelation { |
2447 |
# Get the parameters. |
# Get the parameters. |
2448 |
my ($self, $id, $type, $values) = @_; |
my ($self, $relationName) = @_; |
2449 |
# Create the return hash. |
# Get the database handle. |
2450 |
my $this = { id => $id }; |
my $dbh = $self->{_dbh}; |
2451 |
# Get the metadata structure. |
# Drop the relation. The method used here has no effect if the relation |
2452 |
my $metadata = $self->{_metaData}; |
# does not exist. |
2453 |
# Get this entity's list of fields. |
Trace("Invoking DB Kernel to drop $relationName.") if T(3); |
2454 |
if (!exists $metadata->{Entities}->{$type}) { |
$dbh->drop_table(tbl => $relationName); |
|
Confess("Unrecognized entity type $type in GenerateEntity."); |
|
|
} else { |
|
|
my $entity = $metadata->{Entities}->{$type}; |
|
|
my $fields = $entity->{Fields}; |
|
|
# Generate data from the fields. |
|
|
_GenerateFields($this, $fields, $type, $values); |
|
|
} |
|
|
# Return the hash created. |
|
|
return $this; |
|
2455 |
} |
} |
2456 |
|
|
2457 |
=head3 GetEntity |
=head3 GetEntity |
2785 |
return $objectData->{Fields}; |
return $objectData->{Fields}; |
2786 |
} |
} |
2787 |
|
|
2788 |
=head2 Data Mining Methods |
=head3 SplitKeywords |
2789 |
|
|
2790 |
=head3 GetUsefulCrossValues |
C<< my @keywords = ERDB::SplitKeywords($keywordString); >> |
2791 |
|
|
2792 |
C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >> |
This method returns a list of the positive keywords in the specified |
2793 |
|
keyword string. All of the operators will have been stripped off, |
2794 |
|
and if the keyword is preceded by a minus operator (C<->), it will |
2795 |
|
not be in the list returned. The idea here is to get a list of the |
2796 |
|
keywords the user wants to see. The list will be processed to remove |
2797 |
|
duplicates. |
2798 |
|
|
2799 |
Return a list of the useful attributes that would be returned by a B<Cross> call |
It is possible to create a string that confuses this method. For example |
|
from an entity of the source entity type through the specified relationship. This |
|
|
means it will return the fields of the target entity type and the intersection data |
|
|
fields in the relationship. Only primary table fields are returned. In other words, |
|
|
the field names returned will be for fields where there is always one and only one |
|
|
value. |
|
2800 |
|
|
2801 |
=over 4 |
frog toad -frog |
2802 |
|
|
2803 |
=item sourceEntity |
would return both C<frog> and C<toad>. If this is a problem we can deal |
2804 |
|
with it later. |
2805 |
|
|
2806 |
Name of the entity from which the relationship crossing will start. |
=over 4 |
2807 |
|
|
2808 |
=item relationship |
=item keywordString |
2809 |
|
|
2810 |
Name of the relationship being crossed. |
The keyword string to be parsed. |
2811 |
|
|
2812 |
=item RETURN |
=item RETURN |
2813 |
|
|
2814 |
Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>. |
Returns a list of the words in the keyword string the user wants to |
2815 |
|
see. |
2816 |
|
|
2817 |
=back |
=back |
2818 |
|
|
2819 |
=cut |
=cut |
2820 |
#: Return Type @; |
|
2821 |
sub GetUsefulCrossValues { |
sub SplitKeywords { |
2822 |
# Get the parameters. |
# Get the parameters. |
2823 |
my ($self, $sourceEntity, $relationship) = @_; |
my ($keywordString) = @_; |
2824 |
# Declare the return variable. |
# Make a safety copy of the string. (This helps during debugging.) |
2825 |
my @retVal = (); |
my $workString = $keywordString; |
2826 |
# Determine the target entity for the relationship. This is whichever entity is not |
# Convert operators we don't care about to spaces. |
2827 |
# the source entity. So, if the source entity is the FROM, we'll get the name of |
$workString =~ tr/+"()<>/ /; |
2828 |
# the TO, and vice versa. |
# Split the rest of the string along space boundaries. Note that we |
2829 |
my $relStructure = $self->_GetStructure($relationship); |
# eliminate any words that are zero length or begin with a minus sign. |
2830 |
my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from"); |
my @wordList = grep { $_ && substr($_, 0, 1) ne "-" } split /\s+/, $workString; |
2831 |
my $targetEntity = $relStructure->{$targetEntityType}; |
# Use a hash to remove duplicates. |
2832 |
# Get the field table for the entity. |
my %words = map { $_ => 1 } @wordList; |
|
my $entityFields = $self->GetFieldTable($targetEntity); |
|
|
# The field table is a hash. The hash key is the field name. The hash value is a structure. |
|
|
# For the entity fields, the key aspect of the target structure is that the {relation} value |
|
|
# must match the entity name. |
|
|
my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity } |
|
|
keys %{$entityFields}; |
|
|
# Push the fields found onto the return variable. |
|
|
push @retVal, sort @fieldList; |
|
|
# Get the field table for the relationship. |
|
|
my $relationshipFields = $self->GetFieldTable($relationship); |
|
|
# Here we have a different rule. We want all the fields other than "from-link" and "to-link". |
|
|
# This may end up being an empty set. |
|
|
my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" } |
|
|
keys %{$relationshipFields}; |
|
|
# Push these onto the return list. |
|
|
push @retVal, sort @fieldList2; |
|
2833 |
# Return the result. |
# Return the result. |
2834 |
return @retVal; |
return sort keys %words; |
2835 |
} |
} |
2836 |
|
|
2837 |
=head3 FindColumn |
=head3 ValidateFieldName |
2838 |
|
|
2839 |
C<< my $colIndex = ERDB::FindColumn($headerLine, $columnIdentifier); >> |
C<< my $okFlag = ERDB::ValidateFieldName($fieldName); >> |
2840 |
|
|
2841 |
Return the location a desired column in a data mining header line. The data |
Return TRUE if the specified field name is valid, else FALSE. Valid field names must |
2842 |
mining header line is a tab-separated list of column names. The column |
be hyphenated words subject to certain restrictions. |
|
identifier is either the numerical index of a column or the actual column |
|
|
name. |
|
2843 |
|
|
2844 |
=over 4 |
=over 4 |
2845 |
|
|
2846 |
=item headerLine |
=item fieldName |
|
|
|
|
The header line from a data mining command, which consists of a tab-separated |
|
|
list of column names. |
|
|
|
|
|
=item columnIdentifier |
|
2847 |
|
|
2848 |
Either the ordinal number of the desired column (1-based), or the name of the |
Field name to be validated. |
|
desired column. |
|
2849 |
|
|
2850 |
=item RETURN |
=item RETURN |
2851 |
|
|
2852 |
Returns the array index (0-based) of the desired column. |
Returns TRUE if the field name is valid, else FALSE. |
2853 |
|
|
2854 |
=back |
=back |
2855 |
|
|
2856 |
=cut |
=cut |
2857 |
|
|
2858 |
sub FindColumn { |
sub ValidateFieldName { |
2859 |
# Get the parameters. |
# Get the parameters. |
2860 |
my ($headerLine, $columnIdentifier) = @_; |
my ($fieldName) = @_; |
2861 |
# Declare the return variable. |
# Declare the return variable. The field name is valid until we hear |
2862 |
my $retVal; |
# differently. |
2863 |
# Split the header line into column names. |
my $retVal = 1; |
2864 |
my @headers = ParseColumns($headerLine); |
# Look for bad stuff in the name. |
2865 |
# Determine whether we have a number or a name. |
if ($fieldName =~ /--/) { |
2866 |
if ($columnIdentifier =~ /^\d+$/) { |
# Here we have a doubled minus sign. |
2867 |
|
Trace("Field name $fieldName has a doubled hyphen.") if T(1); |
2868 |
|
$retVal = 0; |
2869 |
|
} elsif ($fieldName !~ /^[A-Za-z]/) { |
2870 |
|
# Here the field name is missing the initial letter. |
2871 |
|
Trace("Field name $fieldName does not begin with a letter.") if T(1); |
2872 |
|
$retVal = 0; |
2873 |
|
} else { |
2874 |
|
# Strip out the minus signs. Everything remaining must be a letter, |
2875 |
|
# underscore, or digit. |
2876 |
|
my $strippedName = $fieldName; |
2877 |
|
$strippedName =~ s/-//g; |
2878 |
|
if ($strippedName !~ /^(\w|\d)+$/) { |
2879 |
|
Trace("Field name $fieldName contains illegal characters.") if T(1); |
2880 |
|
$retVal = 0; |
2881 |
|
} |
2882 |
|
} |
2883 |
|
# Return the result. |
2884 |
|
return $retVal; |
2885 |
|
} |
2886 |
|
|
2887 |
|
=head3 ReadMetaXML |
2888 |
|
|
2889 |
|
C<< my $rawMetaData = ERDB::ReadDBD($fileName); >> |
2890 |
|
|
2891 |
|
This method reads a raw database definition XML file and returns it. |
2892 |
|
Normally, the metadata used by the ERDB system has been processed and |
2893 |
|
modified to make it easier to load and retrieve the data; however, |
2894 |
|
this method can be used to get the data in its raw form. |
2895 |
|
|
2896 |
|
=over 4 |
2897 |
|
|
2898 |
|
=item fileName |
2899 |
|
|
2900 |
|
Name of the XML file to read. |
2901 |
|
|
2902 |
|
=item RETURN |
2903 |
|
|
2904 |
|
Returns a hash reference containing the raw XML data from the specified file. |
2905 |
|
|
2906 |
|
=back |
2907 |
|
|
2908 |
|
=cut |
2909 |
|
|
2910 |
|
sub ReadMetaXML { |
2911 |
|
# Get the parameters. |
2912 |
|
my ($fileName) = @_; |
2913 |
|
# Read the XML. |
2914 |
|
my $retVal = XML::Simple::XMLin($fileName, %XmlOptions, %XmlInOpts); |
2915 |
|
Trace("XML metadata loaded from file $fileName.") if T(1); |
2916 |
|
# Return the result. |
2917 |
|
return $retVal; |
2918 |
|
} |
2919 |
|
|
2920 |
|
=head3 GetEntityFieldHash |
2921 |
|
|
2922 |
|
C<< my $fieldHashRef = ERDB::GetEntityFieldHash($structure, $entityName); >> |
2923 |
|
|
2924 |
|
Get the field hash of the named entity in the specified raw XML structure. |
2925 |
|
The field hash may not exist, in which case we need to create it. |
2926 |
|
|
2927 |
|
=over 4 |
2928 |
|
|
2929 |
|
=item structure |
2930 |
|
|
2931 |
|
Raw XML structure defininng the database. This is not the run-time XML used by |
2932 |
|
an ERDB object, since that has all sorts of optimizations built-in. |
2933 |
|
|
2934 |
|
=item entityName |
2935 |
|
|
2936 |
|
Name of the entity whose field structure is desired. |
2937 |
|
|
2938 |
|
=item RETURN |
2939 |
|
|
2940 |
|
Returns the field hash used to define the entity's fields. |
2941 |
|
|
2942 |
|
=back |
2943 |
|
|
2944 |
|
=cut |
2945 |
|
|
2946 |
|
sub GetEntityFieldHash { |
2947 |
|
# Get the parameters. |
2948 |
|
my ($structure, $entityName) = @_; |
2949 |
|
# Get the entity structure. |
2950 |
|
my $entityData = $structure->{Entities}->{$entityName}; |
2951 |
|
# Look for a field structure. |
2952 |
|
my $retVal = $entityData->{Fields}; |
2953 |
|
# If it doesn't exist, create it. |
2954 |
|
if (! defined($retVal)) { |
2955 |
|
$entityData->{Fields} = {}; |
2956 |
|
$retVal = $entityData->{Fields}; |
2957 |
|
} |
2958 |
|
# Return the result. |
2959 |
|
return $retVal; |
2960 |
|
} |
2961 |
|
|
2962 |
|
=head3 WriteMetaXML |
2963 |
|
|
2964 |
|
C<< ERDB::WriteMetaXML($structure, $fileName); >> |
2965 |
|
|
2966 |
|
Write the metadata XML to a file. This method is the reverse of L</ReadMetaXML>, and is |
2967 |
|
used to update the database definition. It must be used with care, however, since it |
2968 |
|
will only work on a raw structure, not on the processed structure created by an ERDB |
2969 |
|
constructor. |
2970 |
|
|
2971 |
|
=over 4 |
2972 |
|
|
2973 |
|
=item structure |
2974 |
|
|
2975 |
|
XML structure to be written to the file. |
2976 |
|
|
2977 |
|
=item fileName |
2978 |
|
|
2979 |
|
Name of the output file to which the updated XML should be stored. |
2980 |
|
|
2981 |
|
=back |
2982 |
|
|
2983 |
|
=cut |
2984 |
|
|
2985 |
|
sub WriteMetaXML { |
2986 |
|
# Get the parameters. |
2987 |
|
my ($structure, $fileName) = @_; |
2988 |
|
# Compute the output. |
2989 |
|
my $fileString = XML::Simple::XMLout($structure, %XmlOptions, %XmlOutOpts); |
2990 |
|
# Write it to the file. |
2991 |
|
my $xmlOut = Open(undef, ">$fileName"); |
2992 |
|
print $xmlOut $fileString; |
2993 |
|
} |
2994 |
|
|
2995 |
|
|
2996 |
|
=head3 HTMLNote |
2997 |
|
|
2998 |
|
Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes |
2999 |
|
supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph. |
3000 |
|
Except for C<[p]>, all the codes are closed by slash-codes. So, for |
3001 |
|
example, C<[b]Feature[/b]> displays the string C<Feature> in boldface. |
3002 |
|
|
3003 |
|
C<< my $realHtml = ERDB::HTMLNote($dataString); >> |
3004 |
|
|
3005 |
|
=over 4 |
3006 |
|
|
3007 |
|
=item dataString |
3008 |
|
|
3009 |
|
String to convert to HTML. |
3010 |
|
|
3011 |
|
=item RETURN |
3012 |
|
|
3013 |
|
An HTML string derived from the input string. |
3014 |
|
|
3015 |
|
=back |
3016 |
|
|
3017 |
|
=cut |
3018 |
|
|
3019 |
|
sub HTMLNote { |
3020 |
|
# Get the parameter. |
3021 |
|
my ($dataString) = @_; |
3022 |
|
# HTML-escape the text. |
3023 |
|
my $retVal = CGI::escapeHTML($dataString); |
3024 |
|
# Substitute the bulletin board codes. |
3025 |
|
$retVal =~ s!\[(/?[bi])\]!<$1>!g; |
3026 |
|
$retVal =~ s!\[p\]!</p><p>!g; |
3027 |
|
# Return the result. |
3028 |
|
return $retVal; |
3029 |
|
} |
3030 |
|
|
3031 |
|
|
3032 |
|
=head2 Data Mining Methods |
3033 |
|
|
3034 |
|
=head3 GetUsefulCrossValues |
3035 |
|
|
3036 |
|
C<< my @attrNames = $sprout->GetUsefulCrossValues($sourceEntity, $relationship); >> |
3037 |
|
|
3038 |
|
Return a list of the useful attributes that would be returned by a B<Cross> call |
3039 |
|
from an entity of the source entity type through the specified relationship. This |
3040 |
|
means it will return the fields of the target entity type and the intersection data |
3041 |
|
fields in the relationship. Only primary table fields are returned. In other words, |
3042 |
|
the field names returned will be for fields where there is always one and only one |
3043 |
|
value. |
3044 |
|
|
3045 |
|
=over 4 |
3046 |
|
|
3047 |
|
=item sourceEntity |
3048 |
|
|
3049 |
|
Name of the entity from which the relationship crossing will start. |
3050 |
|
|
3051 |
|
=item relationship |
3052 |
|
|
3053 |
|
Name of the relationship being crossed. |
3054 |
|
|
3055 |
|
=item RETURN |
3056 |
|
|
3057 |
|
Returns a list of field names in Sprout field format (I<objectName>C<(>I<fieldName>C<)>. |
3058 |
|
|
3059 |
|
=back |
3060 |
|
|
3061 |
|
=cut |
3062 |
|
#: Return Type @; |
3063 |
|
sub GetUsefulCrossValues { |
3064 |
|
# Get the parameters. |
3065 |
|
my ($self, $sourceEntity, $relationship) = @_; |
3066 |
|
# Declare the return variable. |
3067 |
|
my @retVal = (); |
3068 |
|
# Determine the target entity for the relationship. This is whichever entity is not |
3069 |
|
# the source entity. So, if the source entity is the FROM, we'll get the name of |
3070 |
|
# the TO, and vice versa. |
3071 |
|
my $relStructure = $self->_GetStructure($relationship); |
3072 |
|
my $targetEntityType = ($relStructure->{from} eq $sourceEntity ? "to" : "from"); |
3073 |
|
my $targetEntity = $relStructure->{$targetEntityType}; |
3074 |
|
# Get the field table for the entity. |
3075 |
|
my $entityFields = $self->GetFieldTable($targetEntity); |
3076 |
|
# The field table is a hash. The hash key is the field name. The hash value is a structure. |
3077 |
|
# For the entity fields, the key aspect of the target structure is that the {relation} value |
3078 |
|
# must match the entity name. |
3079 |
|
my @fieldList = map { "$targetEntity($_)" } grep { $entityFields->{$_}->{relation} eq $targetEntity } |
3080 |
|
keys %{$entityFields}; |
3081 |
|
# Push the fields found onto the return variable. |
3082 |
|
push @retVal, sort @fieldList; |
3083 |
|
# Get the field table for the relationship. |
3084 |
|
my $relationshipFields = $self->GetFieldTable($relationship); |
3085 |
|
# Here we have a different rule. We want all the fields other than "from-link" and "to-link". |
3086 |
|
# This may end up being an empty set. |
3087 |
|
my @fieldList2 = map { "$relationship($_)" } grep { $_ ne "from-link" && $_ ne "to-link" } |
3088 |
|
keys %{$relationshipFields}; |
3089 |
|
# Push these onto the return list. |
3090 |
|
push @retVal, sort @fieldList2; |
3091 |
|
# Return the result. |
3092 |
|
return @retVal; |
3093 |
|
} |
3094 |
|
|
3095 |
|
=head3 FindColumn |
3096 |
|
|
3097 |
|
C<< my $colIndex = ERDB::FindColumn($headerLine, $columnIdentifier); >> |
3098 |
|
|
3099 |
|
Return the location a desired column in a data mining header line. The data |
3100 |
|
mining header line is a tab-separated list of column names. The column |
3101 |
|
identifier is either the numerical index of a column or the actual column |
3102 |
|
name. |
3103 |
|
|
3104 |
|
=over 4 |
3105 |
|
|
3106 |
|
=item headerLine |
3107 |
|
|
3108 |
|
The header line from a data mining command, which consists of a tab-separated |
3109 |
|
list of column names. |
3110 |
|
|
3111 |
|
=item columnIdentifier |
3112 |
|
|
3113 |
|
Either the ordinal number of the desired column (1-based), or the name of the |
3114 |
|
desired column. |
3115 |
|
|
3116 |
|
=item RETURN |
3117 |
|
|
3118 |
|
Returns the array index (0-based) of the desired column. |
3119 |
|
|
3120 |
|
=back |
3121 |
|
|
3122 |
|
=cut |
3123 |
|
|
3124 |
|
sub FindColumn { |
3125 |
|
# Get the parameters. |
3126 |
|
my ($headerLine, $columnIdentifier) = @_; |
3127 |
|
# Declare the return variable. |
3128 |
|
my $retVal; |
3129 |
|
# Split the header line into column names. |
3130 |
|
my @headers = ParseColumns($headerLine); |
3131 |
|
# Determine whether we have a number or a name. |
3132 |
|
if ($columnIdentifier =~ /^\d+$/) { |
3133 |
# Here we have a number. Subtract 1 and validate the result. |
# Here we have a number. Subtract 1 and validate the result. |
3134 |
$retVal = $columnIdentifier - 1; |
$retVal = $columnIdentifier - 1; |
3135 |
if ($retVal < 0 || $retVal > $#headers) { |
if ($retVal < 0 || $retVal > $#headers) { |
3182 |
return @retVal; |
return @retVal; |
3183 |
} |
} |
3184 |
|
|
3185 |
|
=head2 Virtual Methods |
3186 |
|
|
3187 |
|
=head3 CleanKeywords |
3188 |
|
|
3189 |
|
C<< my $cleanedString = $erdb->CleanKeywords($searchExpression); >> |
3190 |
|
|
3191 |
|
Clean up a search expression or keyword list. This is a virtual method that may |
3192 |
|
be overridden by the subclass. The base-class method removes extra spaces |
3193 |
|
and converts everything to lower case. |
3194 |
|
|
3195 |
|
=over 4 |
3196 |
|
|
3197 |
|
=item searchExpression |
3198 |
|
|
3199 |
|
Search expression or keyword list to clean. Note that a search expression may |
3200 |
|
contain boolean operators which need to be preserved. This includes leading |
3201 |
|
minus signs. |
3202 |
|
|
3203 |
|
=item RETURN |
3204 |
|
|
3205 |
|
Cleaned expression or keyword list. |
3206 |
|
|
3207 |
|
=back |
3208 |
|
|
3209 |
|
=cut |
3210 |
|
|
3211 |
|
sub CleanKeywords { |
3212 |
|
# Get the parameters. |
3213 |
|
my ($self, $searchExpression) = @_; |
3214 |
|
# Lower-case the expression and copy it into the return variable. Note that we insure we |
3215 |
|
# don't accidentally end up with an undefined value. |
3216 |
|
my $retVal = lc($searchExpression || ""); |
3217 |
|
# Remove extra spaces. |
3218 |
|
$retVal =~ s/\s+/ /g; |
3219 |
|
$retVal =~ s/(^\s+)|(\s+$)//g; |
3220 |
|
# Return the result. |
3221 |
|
return $retVal; |
3222 |
|
} |
3223 |
|
|
3224 |
|
=head3 GetSourceObject |
3225 |
|
|
3226 |
|
C<< my $source = $erdb->GetSourceObject($entityName); >> |
3227 |
|
|
3228 |
|
Return the object to be used in loading special attributes of the specified entity. The |
3229 |
|
algorithm for loading special attributes is stored in the C<DataGen> elements of the |
3230 |
|
XML |
3231 |
|
|
3232 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
3233 |
|
|
3234 |
=head3 _RelationMap |
=head3 _RelationMap |
3554 |
# Prepare the command. |
# Prepare the command. |
3555 |
my $sth = $dbh->prepare_command($command); |
my $sth = $dbh->prepare_command($command); |
3556 |
# Execute it with the parameters bound in. |
# Execute it with the parameters bound in. |
3557 |
$sth->execute(@{$params}) || Confess("SELECT error" . $sth->errstr()); |
$sth->execute(@{$params}) || Confess("SELECT error: " . $sth->errstr()); |
3558 |
# Return the statement handle. |
# Return the statement handle. |
3559 |
return $sth; |
return $sth; |
3560 |
} |
} |
3571 |
return Stats->new(); |
return Stats->new(); |
3572 |
} |
} |
3573 |
|
|
|
=head3 _GenerateFields |
|
|
|
|
|
Generate field values from a field structure and store in a specified table. The field names |
|
|
are first sorted by pass count, certain pre-defined fields are removed from the list, and |
|
|
then we rip through them evaluation the data generation string. Fields in the primary relation |
|
|
are stored as scalars; fields in secondary relations are stored as value lists. |
|
|
|
|
|
This is a static method. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item this |
|
|
|
|
|
Hash table into which the field values should be placed. |
|
|
|
|
|
=item fields |
|
|
|
|
|
Field structure from which the field descriptors should be taken. |
|
|
|
|
|
=item type |
|
|
|
|
|
Type name of the object whose fields are being generated. |
|
|
|
|
|
=item values (optional) |
|
|
|
|
|
Reference to a value structure from which additional values can be taken. |
|
|
|
|
|
=item from (optiona) |
|
|
|
|
|
Reference to the source entity instance if relationship data is being generated. |
|
|
|
|
|
=item to (optional) |
|
|
|
|
|
Reference to the target entity instance if relationship data is being generated. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub _GenerateFields { |
|
|
# Get the parameters. |
|
|
my ($this, $fields, $type, $values, $from, $to) = @_; |
|
|
# Sort the field names by pass number. |
|
|
my @fieldNames = sort { $fields->{$a}->{DataGen}->{pass} <=> $fields->{$b}->{DataGen}->{pass} } keys %{$fields}; |
|
|
# Loop through the field names, generating data. |
|
|
for my $name (@fieldNames) { |
|
|
# Only proceed if this field needs to be generated. |
|
|
if (!exists $this->{$name}) { |
|
|
# Get this field's data generation descriptor. |
|
|
my $fieldDescriptor = $fields->{$name}; |
|
|
my $data = $fieldDescriptor->{DataGen}; |
|
|
# Get the code to generate the field value. |
|
|
my $codeString = $data->{content}; |
|
|
# Determine whether or not this field is in the primary relation. |
|
|
if ($fieldDescriptor->{relation} eq $type) { |
|
|
# Here we have a primary relation field. Store the field value as |
|
|
# a scalar. |
|
|
$this->{$name} = eval($codeString); |
|
|
} else { |
|
|
# Here we have a secondary relation field. Create a null list |
|
|
# and push the desired number of field values onto it. |
|
|
my @fieldValues = (); |
|
|
my $count = IntGen(0,$data->{testCount}); |
|
|
for (my $i = 0; $i < $count; $i++) { |
|
|
my $newValue = eval($codeString); |
|
|
push @fieldValues, $newValue; |
|
|
} |
|
|
# Store the value list in the main hash. |
|
|
$this->{$name} = \@fieldValues; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
3574 |
=head3 _DumpRelation |
=head3 _DumpRelation |
3575 |
|
|
3576 |
Dump the specified relation's to the specified output file in tab-delimited format. |
Dump the specified relation to the specified output file in tab-delimited format. |
3577 |
|
|
3578 |
This is an instance method. |
This is an instance method. |
3579 |
|
|
3718 |
for my $object (values %{$metadata->{$section}}) { |
for my $object (values %{$metadata->{$section}}) { |
3719 |
# Loop through the object's fields. |
# Loop through the object's fields. |
3720 |
for my $fieldName (keys %{$object->{Fields}}) { |
for my $fieldName (keys %{$object->{Fields}}) { |
3721 |
# Now we make some initial validations. |
# If this field name is invalid, set the return value to zero |
3722 |
if ($fieldName =~ /--/) { |
# so we know we encountered an error. |
3723 |
# Here we have a doubled minus sign. |
if (! ValidateFieldName($fieldName)) { |
|
print STDERR "Field name $fieldName has a doubled hyphen.\n"; |
|
|
$retVal = 0; |
|
|
} elsif ($fieldName !~ /^[A-Za-z]/) { |
|
|
# Here the field name is missing the initial letter. |
|
|
print STDERR "Field name $fieldName does not begin with a letter.\n"; |
|
|
$retVal = 0; |
|
|
} else { |
|
|
# Strip out the minus signs. Everything remaining must be a letter |
|
|
# or digit. |
|
|
my $strippedName = $fieldName; |
|
|
$strippedName =~ s/-//g; |
|
|
if ($strippedName !~ /^[A-Za-z0-9]+$/) { |
|
|
print STDERR "Field name $fieldName contains illegal characters.\n"; |
|
3724 |
$retVal = 0; |
$retVal = 0; |
3725 |
} |
} |
3726 |
} |
} |
3727 |
} |
} |
3728 |
} |
} |
|
} |
|
3729 |
# If an error was found, fail. |
# If an error was found, fail. |
3730 |
if ($retVal == 0) { |
if ($retVal == 0) { |
3731 |
Confess("Errors found in field names."); |
Confess("Errors found in field names."); |
3792 |
return $retVal; |
return $retVal; |
3793 |
} |
} |
3794 |
|
|
3795 |
|
|
3796 |
=head3 _LoadMetaData |
=head3 _LoadMetaData |
3797 |
|
|
3798 |
This method loads the data describing this database from an XML file into a metadata structure. |
This method loads the data describing this database from an XML file into a metadata structure. |
3818 |
sub _LoadMetaData { |
sub _LoadMetaData { |
3819 |
# Get the parameters. |
# Get the parameters. |
3820 |
my ($filename) = @_; |
my ($filename) = @_; |
3821 |
Trace("Reading Sprout DBD from $filename.") if T(2); |
Trace("Reading DBD from $filename.") if T(2); |
3822 |
# Slurp the XML file into a variable. Extensive use of options is used to insure we |
# Slurp the XML file into a variable. Extensive use of options is used to insure we |
3823 |
# get the exact structure we want. |
# get the exact structure we want. |
3824 |
my $metadata = XML::Simple::XMLin($filename, |
my $metadata = ReadMetaXML($filename); |
|
GroupTags => { Relationships => 'Relationship', |
|
|
Entities => 'Entity', |
|
|
Fields => 'Field', |
|
|
Indexes => 'Index', |
|
|
IndexFields => 'IndexField'}, |
|
|
KeyAttr => { Relationship => 'name', |
|
|
Entity => 'name', |
|
|
Field => 'name'}, |
|
|
ForceArray => ['Field', 'Index', 'IndexField'], |
|
|
ForceContent => 1, |
|
|
NormalizeSpace => 2 |
|
|
); |
|
|
Trace("XML metadata loaded from file $filename.") if T(1); |
|
3825 |
# Before we go any farther, we need to validate the field and object names. If an error is found, |
# Before we go any farther, we need to validate the field and object names. If an error is found, |
3826 |
# the method below will fail. |
# the method below will fail. |
3827 |
_ValidateFieldNames($metadata); |
_ValidateFieldNames($metadata); |
3951 |
my $count = 0; |
my $count = 0; |
3952 |
for my $index (@{$indexList}) { |
for my $index (@{$indexList}) { |
3953 |
# Add this index to the index table. |
# Add this index to the index table. |
3954 |
_AddIndex("idx$relationName$count", $relation, $index); |
_AddIndex("idx$count", $relation, $index); |
3955 |
# Increment the counter so that the next index has a different name. |
# Increment the counter so that the next index has a different name. |
3956 |
$count++; |
$count++; |
3957 |
} |
} |
4149 |
$newIndex->{Unique} = 'true'; |
$newIndex->{Unique} = 'true'; |
4150 |
} |
} |
4151 |
# Add the index to the relation. |
# Add the index to the relation. |
4152 |
_AddIndex("idx$relationshipName$indexKey", $relationStructure, $newIndex); |
_AddIndex("idx$indexKey", $relationStructure, $newIndex); |
4153 |
} |
} |
4154 |
|
|
4155 |
=head3 _AddIndex |
=head3 _AddIndex |
4248 |
my $type = $fieldData->{type}; |
my $type = $fieldData->{type}; |
4249 |
# Plug in a relation name if it is needed. |
# Plug in a relation name if it is needed. |
4250 |
Tracer::MergeOptions($fieldData, { relation => $defaultRelationName }); |
Tracer::MergeOptions($fieldData, { relation => $defaultRelationName }); |
|
# Plug in a data generator if we need one. |
|
|
if (!exists $fieldData->{DataGen}) { |
|
|
# The data generator will use the default for the field's type. |
|
|
$fieldData->{DataGen} = { content => $TypeTable{$type}->{dataGen} }; |
|
|
} |
|
4251 |
# Check for searchability. |
# Check for searchability. |
4252 |
if ($fieldData->{searchable}) { |
if ($fieldData->{searchable}) { |
4253 |
# Only allow this for a primary relation. |
# Only allow this for a primary relation. |
4257 |
push @textFields, $fieldName; |
push @textFields, $fieldName; |
4258 |
} |
} |
4259 |
} |
} |
|
# Plug in the defaults for the optional data generation parameters. |
|
|
Tracer::MergeOptions($fieldData->{DataGen}, { testCount => 1, pass => 0 }); |
|
4260 |
# Add the PrettySortValue. |
# Add the PrettySortValue. |
4261 |
$fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue); |
$fieldData->{PrettySort} = (($type eq "text") ? $textPrettySortValue : $prettySortValue); |
4262 |
} |
} |
4615 |
$htmlString .= "<li><b>Index $fullName</b>\n<ul>\n"; |
$htmlString .= "<li><b>Index $fullName</b>\n<ul>\n"; |
4616 |
# Add any note text. |
# Add any note text. |
4617 |
if (my $note = $indexData->{Notes}) { |
if (my $note = $indexData->{Notes}) { |
4618 |
$htmlString .= "<li>" . _HTMLNote($note->{content}) . "</li>\n"; |
$htmlString .= "<li>" . HTMLNote($note->{content}) . "</li>\n"; |
4619 |
} |
} |
4620 |
# Add the fiield list. |
# Add the fiield list. |
4621 |
$htmlString .= "<li><i>" . join(', ', @{$indexData->{IndexFields}}) . "</i></li>\n"; |
$htmlString .= "<li><i>" . join(', ', @{$indexData->{IndexFields}}) . "</i></li>\n"; |
4730 |
my $htmlString = "<tr><th align=\"left\">$fieldData->{name}</th><td>$fieldData->{type}</td>"; |
my $htmlString = "<tr><th align=\"left\">$fieldData->{name}</th><td>$fieldData->{type}</td>"; |
4731 |
# If we have content, add it as a third column. |
# If we have content, add it as a third column. |
4732 |
if (exists $fieldData->{Notes}) { |
if (exists $fieldData->{Notes}) { |
4733 |
$htmlString .= "<td>" . _HTMLNote($fieldData->{Notes}->{content}) . "</td>"; |
$htmlString .= "<td>" . HTMLNote($fieldData->{Notes}->{content}) . "</td>"; |
4734 |
} |
} |
4735 |
# Close off the row. |
# Close off the row. |
4736 |
$htmlString .= "</tr>\n"; |
$htmlString .= "</tr>\n"; |
4738 |
return $htmlString; |
return $htmlString; |
4739 |
} |
} |
4740 |
|
|
|
=head3 _HTMLNote |
|
|
|
|
|
Convert a note or comment to HTML by replacing some bulletin-board codes with HTML. The codes |
|
|
supported are C<[b]> for B<bold>, C<[i]> for I<italics>, and C<[p]> for a new paragraph. |
|
|
Except for C<[p]>, all the codes are closed by slash-codes. So, for |
|
|
example, C<[b]Feature[/b]> displays the string C<Feature> in boldface. |
|
|
|
|
|
This is a static method. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item dataString |
|
|
|
|
|
String to convert to HTML. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
An HTML string derived from the input string. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub _HTMLNote { |
|
|
# Get the parameter. |
|
|
my ($dataString) = @_; |
|
|
# Substitute the codes. |
|
|
$dataString =~ s!\[(/?[bi])\]!<$1>!g; |
|
|
$dataString =~ s!\[p\]!</p><p>!g; |
|
|
# Return the result. |
|
|
return $dataString; |
|
|
} |
|
|
|
|
|
=head2 Data Generation Utilities |
|
|
|
|
|
=head3 IntGen |
|
|
|
|
|
C<< my $integer = IntGen($min, $max); >> |
|
|
|
|
|
Returns a random number between the specified minimum and maximum (inclusive). |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item min |
|
|
|
|
|
Minimum permissible return value. |
|
|
|
|
|
=item max |
|
|
|
|
|
Maximum permissible return value. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a value no lower than the minimum and no greater than the maximum. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub IntGen { |
|
|
# Get the parameters. |
|
|
my ($min, $max) = @_; |
|
|
# Determine the range of possible values. Note we put some space well above the |
|
|
# maximum value to give it a fighting chance of apppearing in the list. |
|
|
my $span = $max + 0.99 - $min; |
|
|
# Create an integer in the range. |
|
|
my $retVal = $min + int(rand($span)); |
|
|
# Return the result. |
|
|
return $retVal; |
|
|
} |
|
|
|
|
|
=head3 RandChar |
|
|
|
|
|
C<< my $char = RandChar($sourceString); >> |
|
|
|
|
|
Select a random character from a string. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item sourceString |
|
|
|
|
|
String from which the random character should be selected. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a single character from the incoming string. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub RandChar { |
|
|
# Get the parameter. |
|
|
my ($sourceString) = @_; |
|
|
# Select a random character. |
|
|
my $retVal = IntGen(0, (length $sourceString) - 1); |
|
|
# Return it. |
|
|
return substr($sourceString, $retVal, 1); |
|
|
} |
|
|
|
|
|
=head3 RandChars |
|
|
|
|
|
C<< my $string = RandChars($sourceString, $length); >> |
|
|
|
|
|
Create a string from characters taken from a source string. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item sourceString |
|
|
|
|
|
String from which the random characters should be selected. |
|
|
|
|
|
=item length |
|
|
|
|
|
Number of characters to put in the output string. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a string of the specified length consisting of characters taken from the |
|
|
source string. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub RandChars { |
|
|
# Get the parameters. |
|
|
my ($sourceString, $length) = @_; |
|
|
# Call RandChar repeatedly to generate the string. |
|
|
my $retVal = ""; |
|
|
for (my $i = 0; $i < $length; $i++) { |
|
|
$retVal .= RandChar($sourceString); |
|
|
} |
|
|
# Return the result. |
|
|
return $retVal; |
|
|
} |
|
|
|
|
|
=head3 RandParam |
|
|
|
|
|
C<< my $value = RandParam($parm1, $parm2, ... $parmN); >> |
|
|
|
|
|
Return a randomly-selected value from the parameter list. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item parm1, parm2, ... parmN |
|
|
|
|
|
List of values of which one will be selected. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a randomly-chosen value from the specified list. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub RandParam { |
|
|
# Get the parameter. |
|
|
my @parms = @_; |
|
|
# Choose a random parameter from the list. |
|
|
my $chosenIndex = IntGen(0, $#parms); |
|
|
return $parms[$chosenIndex]; |
|
|
} |
|
|
|
|
|
=head3 StringGen |
|
|
|
|
|
C<< my $string = StringGen($pattern1, $pattern2, ... $patternN); >> |
|
|
|
|
|
Returns a random string derived from a randomly-chosen format pattern. The pattern |
|
|
can either be a number (indicating the number of characters desired, or the letter |
|
|
C<P> followed by a picture. The picture should contain C<A> when a letter is desired, |
|
|
C<9> when a digit is desired, C<V> when a vowel is desired, C<K> when a consonant is |
|
|
desired, and C<X> when a letter or a digit is desired. Any other character will be |
|
|
translated as a literal. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item pattern1, pattern2, ... patternN |
|
|
|
|
|
List of patterns to be used to generate string values. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
A single string generated from a pattern. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub StringGen { |
|
|
# Get the parameters. |
|
|
my @patterns = @_; |
|
|
# Choose the appropriate pattern. |
|
|
my $chosenPattern = RandParam(@patterns); |
|
|
# Declare the return variable. |
|
|
my $retVal = ""; |
|
|
# Determine whether this is a count or a picture pattern. |
|
|
if ($chosenPattern =~ m/^\d+/) { |
|
|
# Here we have a count. Get the string of source characters. |
|
|
my $letterString = $PictureTable{'X'}; |
|
|
my $stringLen = length $letterString; |
|
|
# Save the number of characters we have to generate. |
|
|
my $charsLeft = $chosenPattern; |
|
|
# Loop until the return variable is full. |
|
|
while ($charsLeft > 0) { |
|
|
# Generate a random position in the soruce string. |
|
|
my $stringIndex = IntGen(0, $stringLen - 1); |
|
|
# Compute the number of characters to pull out of the source string. |
|
|
my $chunkSize = $stringLen - $stringIndex; |
|
|
if ($chunkSize > $charsLeft) { $chunkSize = $charsLeft; } |
|
|
# Stuff this chunk into the return value. |
|
|
$retVal .= substr($letterString, $stringIndex, $chunkSize); |
|
|
# Record the data moved. |
|
|
$charsLeft -= $chunkSize; |
|
|
} |
|
|
} elsif ($chosenPattern =~ m/^P/) { |
|
|
# Here we have a picture string. We will move through the picture one |
|
|
# character at a time generating data. |
|
|
for (my $i = 1; $i < length $chosenPattern; $i++) { |
|
|
# Get this picture character. |
|
|
my $chr = substr($chosenPattern, $i, 1); |
|
|
# Check to see if the picture char is one we recognize. |
|
|
if (exists $PictureTable{$chr}) { |
|
|
# Choose a random character from the available values for this |
|
|
# picture character. |
|
|
$retVal .= RandChar($PictureTable{$chr}); |
|
|
} else { |
|
|
# Copy in the picture character as a literal. |
|
|
$retVal .= $chr; |
|
|
} |
|
|
} |
|
|
} else { |
|
|
# Here we have neither a picture string or a letter count, so we treat |
|
|
# the string as a literal. |
|
|
$retVal = $chosenPattern; |
|
|
} |
|
|
# Return the string formed. |
|
|
return $retVal; |
|
|
} |
|
|
|
|
|
=head3 DateGen |
|
|
|
|
|
C<< my $date = DateGen($startDayOffset, $endDayOffset, $minutes); >> |
|
|
|
|
|
Return a numeric timestamp within the specified range of days with the specified minute |
|
|
value. The range of days is specified relevant to the current day. Thus, the call |
|
|
|
|
|
C<< my $date = DateGen(-1, 5, 720); >> |
|
|
|
|
|
will return a timestamp at noon (72 minutes past midnight) sometime during the week that |
|
|
began on the preceding day. If you want a random minute of the day, simply combine with |
|
|
a call to L</IntGen>, as follows. |
|
|
|
|
|
C<< my $date = DateGen(-1, 5, IntGen(0, 1439)); >> |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item startDayOffset |
|
|
|
|
|
The earliest day that can be returned, relative to the current day. |
|
|
|
|
|
=item endDayOffset |
|
|
|
|
|
The latest day that can be returned, related to the current day. |
|
|
|
|
|
=item minutes |
|
|
|
|
|
Number of minutes into the selected day that should be used. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub DateGen { |
|
|
# Get the parameters. |
|
|
my ($startDayOffset, $endDayOffset, $minutes) = @_; |
|
|
# Get midnight of the current day. |
|
|
my $now = time(); |
|
|
my ($sec, $min, $hour) = localtime($now); |
|
|
my $today = $now - (($hour * 60 + $min) * 60 + $sec); |
|
|
# Compute the day we want. |
|
|
my $newDay = IntGen($startDayOffset, $endDayOffset) * 86400 + $today; |
|
|
# Add the minutes. |
|
|
my $retVal = $newDay + $minutes * 60; |
|
|
# Return the result. |
|
|
return $retVal; |
|
|
} |
|
|
|
|
|
=head3 FloatGen |
|
|
|
|
|
C<< my $number = FloatGen($min, $max); >> |
|
|
|
|
|
Return a random floating-point number greater than or equal to the specified minimum and |
|
|
less than the specified maximum. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item min |
|
|
|
|
|
Minimum permissible value for the number returned. |
|
|
|
|
|
=item max |
|
|
|
|
|
Maximum permissible value for the number returned. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a floating-point number anywhere in the specified range. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub FloatGen { |
|
|
# Get the parameters. |
|
|
my ($min, $max) = @_; |
|
|
# Generate the result. |
|
|
my $retVal = rand($max - $min) + $min; |
|
|
return $retVal; |
|
|
} |
|
|
|
|
|
=head3 ListGen |
|
|
|
|
|
C<< my @list = ListGen($pattern, $count); >> |
|
|
|
|
|
Return a list containing a fixed number of randomly-generated strings. |
|
|
|
|
|
=over 4 |
|
|
|
|
|
=item pattern |
|
|
|
|
|
A pattern (in the form expected by L</StringGen>) that should be used to generate the |
|
|
strings in the list. |
|
|
|
|
|
=item count |
|
|
|
|
|
The number of list entries to generate. |
|
|
|
|
|
=item RETURN |
|
|
|
|
|
Returns a list consisting of the specified number of strings. |
|
|
|
|
|
=back |
|
|
|
|
|
=cut |
|
|
|
|
|
sub ListGen { |
|
|
# Get the parameters. |
|
|
my ($pattern, $count) = @_; |
|
|
# Generate the list. |
|
|
my @retVal = (); |
|
|
for (my $i = 0; $i < $count; $i++) { |
|
|
push @retVal, StringGen($pattern); |
|
|
} |
|
|
# Return it. |
|
|
return @retVal; |
|
|
} |
|
|
|
|
4741 |
1; |
1; |