91 |
|
|
92 |
32-bit signed integer |
32-bit signed integer |
93 |
|
|
94 |
|
=item counter |
95 |
|
|
96 |
|
32-bit unsigned integer |
97 |
|
|
98 |
=item date |
=item date |
99 |
|
|
100 |
64-bit unsigned integer, representing a PERL date/time value |
64-bit unsigned integer, representing a PERL date/time value |
322 |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
# "maxLen" is the maximum permissible length of the incoming string data used to populate a field |
323 |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
# of the specified type. "dataGen" is PERL string that will be evaluated if no test data generation |
324 |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
# string is specified in the field definition. "avgLen" is the average byte length for estimating |
325 |
# record sizes. |
# record sizes. "sort" is the key modifier for the sort command. |
326 |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, dataGen => "StringGen('A')" }, |
my %TypeTable = ( char => { sqlType => 'CHAR(1)', maxLen => 1, avgLen => 1, sort => "", dataGen => "StringGen('A')" }, |
327 |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, dataGen => "IntGen(0, 99999999)" }, |
int => { sqlType => 'INTEGER', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
328 |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, dataGen => "StringGen(IntGen(10,250))" }, |
counter => { sqlType => 'INTEGER UNSIGNED', maxLen => 20, avgLen => 4, sort => "n", dataGen => "IntGen(0, 99999999)" }, |
329 |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, dataGen => "StringGen(IntGen(80,1000))" }, |
string => { sqlType => 'VARCHAR(255)', maxLen => 255, avgLen => 100, sort => "", dataGen => "StringGen(IntGen(10,250))" }, |
330 |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
text => { sqlType => 'TEXT', maxLen => 1000000000, avgLen => 500, sort => "", dataGen => "StringGen(IntGen(80,1000))" }, |
331 |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, dataGen => "FloatGen(0.0, 100.0)" }, |
date => { sqlType => 'BIGINT', maxLen => 80, avgLen => 8, sort => "n", dataGen => "DateGen(-7, 7, IntGen(0,1400))" }, |
332 |
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, dataGen => "IntGen(0, 1)" }, |
float => { sqlType => 'DOUBLE PRECISION', maxLen => 40, avgLen => 8, sort => "g", dataGen => "FloatGen(0.0, 100.0)" }, |
333 |
|
boolean => { sqlType => 'SMALLINT', maxLen => 1, avgLen => 1, sort => "n", dataGen => "IntGen(0, 1)" }, |
334 |
'hash-string' => |
'hash-string' => |
335 |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(22)', maxLen => 22, avgLen => 22, sort => "", dataGen => "SringGen(22)" }, |
336 |
'id-string' => |
'id-string' => |
337 |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, dataGen => "SringGen(22)" }, |
{ sqlType => 'VARCHAR(25)', maxLen => 25, avgLen => 25, sort => "", dataGen => "SringGen(22)" }, |
338 |
'key-string' => |
'key-string' => |
339 |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, dataGen => "StringGen(IntGen(10,40))" }, |
{ sqlType => 'VARCHAR(40)', maxLen => 40, avgLen => 10, sort => "", dataGen => "StringGen(IntGen(10,40))" }, |
340 |
'name-string' => |
'name-string' => |
341 |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, dataGen => "StringGen(IntGen(10,80))" }, |
{ sqlType => 'VARCHAR(80)', maxLen => 80, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,80))" }, |
342 |
'medium-string' => |
'medium-string' => |
343 |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, dataGen => "StringGen(IntGen(10,160))" }, |
{ sqlType => 'VARCHAR(160)', maxLen => 160, avgLen => 40, sort => "", dataGen => "StringGen(IntGen(10,160))" }, |
344 |
); |
); |
345 |
|
|
346 |
# Table translating arities into natural language. |
# Table translating arities into natural language. |
1436 |
sub GetCount { |
sub GetCount { |
1437 |
# Get the parameters. |
# Get the parameters. |
1438 |
my ($self, $objectNames, $filter, $params) = @_; |
my ($self, $objectNames, $filter, $params) = @_; |
1439 |
|
# Insure the params argument is an array reference if the caller left it off. |
1440 |
|
if (! defined($params)) { |
1441 |
|
$params = []; |
1442 |
|
} |
1443 |
# Declare the return variable. |
# Declare the return variable. |
1444 |
my $retVal; |
my $retVal; |
1445 |
# Find out if we're counting an entity or a relationship. |
# Find out if we're counting an entity or a relationship. |
1595 |
my $entityData = $self->{_metaData}->{Entities}->{$entityName}; |
my $entityData = $self->{_metaData}->{Entities}->{$entityName}; |
1596 |
# Find the relation containing this field. |
# Find the relation containing this field. |
1597 |
my $fieldHash = $entityData->{Fields}; |
my $fieldHash = $entityData->{Fields}; |
1598 |
if (! exist $fieldHash->{$fieldTitle}) { |
if (! exists $fieldHash->{$fieldTitle}) { |
1599 |
Confess("$fieldTitle not found in $entityName."); |
Confess("$fieldTitle not found in $entityName."); |
1600 |
} else { |
} else { |
1601 |
my $relation = $fieldHash->{$fieldTitle}->{relation}; |
my $relation = $fieldHash->{$fieldTitle}->{relation}; |
1605 |
# Now we can create an INSERT statement. |
# Now we can create an INSERT statement. |
1606 |
my $dbh = $self->{_dbh}; |
my $dbh = $self->{_dbh}; |
1607 |
my $fixedName = _FixName($fieldTitle); |
my $fixedName = _FixName($fieldTitle); |
1608 |
my $statement = "INSERT INTO $relation ($fixedName) VALUES(?, ?)"; |
my $statement = "INSERT INTO $relation (id, $fixedName) VALUES(?, ?)"; |
1609 |
# Execute the command. |
# Execute the command. |
1610 |
$dbh->SQL($statement, 0, $entityID, $value); |
$dbh->SQL($statement, 0, $entityID, $value); |
1611 |
} |
} |
1632 |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and |
1633 |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>. |
1634 |
|
|
1635 |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
C<< $erdb->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >> |
1636 |
|
|
1637 |
=over 4 |
=over 4 |
1638 |
|
|
1842 |
} |
} |
1843 |
} |
} |
1844 |
# Analyze the table to improve performance. |
# Analyze the table to improve performance. |
1845 |
|
Trace("Analyzing and compacting $relationName.") if T(3); |
1846 |
$dbh->vacuum_it($relationName); |
$dbh->vacuum_it($relationName); |
1847 |
|
Trace("$relationName load completed.") if T(3); |
1848 |
# Return the statistics. |
# Return the statistics. |
1849 |
return $retVal; |
return $retVal; |
1850 |
} |
} |
1943 |
return $retVal; |
return $retVal; |
1944 |
} |
} |
1945 |
|
|
1946 |
|
=head3 GetChoices |
1947 |
|
|
1948 |
|
C<< my @values = $erdb->GetChoices($entityName, $fieldName); >> |
1949 |
|
|
1950 |
|
Return a list of all the values for the specified field that are represented in the |
1951 |
|
specified entity. |
1952 |
|
|
1953 |
|
Note that if the field is not indexed, then this will be a very slow operation. |
1954 |
|
|
1955 |
|
=over 4 |
1956 |
|
|
1957 |
|
=item entityName |
1958 |
|
|
1959 |
|
Name of an entity in the database. |
1960 |
|
|
1961 |
|
=item fieldName |
1962 |
|
|
1963 |
|
Name of a field belonging to the entity. This is a raw field name without |
1964 |
|
the standard parenthesized notation used in most calls. |
1965 |
|
|
1966 |
|
=item RETURN |
1967 |
|
|
1968 |
|
Returns a list of the distinct values for the specified field in the database. |
1969 |
|
|
1970 |
|
=back |
1971 |
|
|
1972 |
|
=cut |
1973 |
|
|
1974 |
|
sub GetChoices { |
1975 |
|
# Get the parameters. |
1976 |
|
my ($self, $entityName, $fieldName) = @_; |
1977 |
|
# Declare the return variable. |
1978 |
|
my @retVal; |
1979 |
|
# Get the entity data structure. |
1980 |
|
my $entityData = $self->_GetStructure($entityName); |
1981 |
|
# Get the field. |
1982 |
|
my $fieldHash = $entityData->{Fields}; |
1983 |
|
if (! exists $fieldHash->{$fieldName}) { |
1984 |
|
Confess("$fieldName not found in $entityName."); |
1985 |
|
} else { |
1986 |
|
# Get the name of the relation containing the field. |
1987 |
|
my $relation = $fieldHash->{$fieldName}->{relation}; |
1988 |
|
# Fix up the field name. |
1989 |
|
my $realName = _FixName($fieldName); |
1990 |
|
# Get the database handle. |
1991 |
|
my $dbh = $self->{_dbh}; |
1992 |
|
# Query the database. |
1993 |
|
my $results = $dbh->SQL("SELECT DISTINCT $realName FROM $relation"); |
1994 |
|
# Clean the results. They are stored as a list of lists, and we just want the one list. |
1995 |
|
@retVal = sort map { $_->[0] } @{$results}; |
1996 |
|
} |
1997 |
|
# Return the result. |
1998 |
|
return @retVal; |
1999 |
|
} |
2000 |
|
|
2001 |
=head3 GetEntityValues |
=head3 GetEntityValues |
2002 |
|
|
2003 |
C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >> |
C<< my @values = $erdb->GetEntityValues($entityType, $ID, \@fields); >> |
2004 |
|
|
2005 |
Return a list of values from a specified entity instance. |
Return a list of values from a specified entity instance. If the entity instance |
2006 |
|
does not exist, an empty list is returned. |
2007 |
|
|
2008 |
=over 4 |
=over 4 |
2009 |
|
|
2131 |
push @retVal, \@rowData; |
push @retVal, \@rowData; |
2132 |
$fetched++; |
$fetched++; |
2133 |
} |
} |
2134 |
|
Trace("$fetched rows returned in GetAll.") if T(SQL => 4); |
2135 |
# Return the resulting list. |
# Return the resulting list. |
2136 |
return @retVal; |
return @retVal; |
2137 |
} |
} |
2138 |
|
|
2139 |
|
=head3 Exists |
2140 |
|
|
2141 |
|
C<< my $found = $sprout->Exists($entityName, $entityID); >> |
2142 |
|
|
2143 |
|
Return TRUE if an entity exists, else FALSE. |
2144 |
|
|
2145 |
|
=over 4 |
2146 |
|
|
2147 |
|
=item entityName |
2148 |
|
|
2149 |
|
Name of the entity type (e.g. C<Feature>) relevant to the existence check. |
2150 |
|
|
2151 |
|
=item entityID |
2152 |
|
|
2153 |
|
ID of the entity instance whose existence is to be checked. |
2154 |
|
|
2155 |
|
=item RETURN |
2156 |
|
|
2157 |
|
Returns TRUE if the entity instance exists, else FALSE. |
2158 |
|
|
2159 |
|
=back |
2160 |
|
|
2161 |
|
=cut |
2162 |
|
#: Return Type $; |
2163 |
|
sub Exists { |
2164 |
|
# Get the parameters. |
2165 |
|
my ($self, $entityName, $entityID) = @_; |
2166 |
|
# Check for the entity instance. |
2167 |
|
Trace("Checking existence of $entityName with ID=$entityID.") if T(4); |
2168 |
|
my $testInstance = $self->GetEntity($entityName, $entityID); |
2169 |
|
# Return an existence indicator. |
2170 |
|
my $retVal = ($testInstance ? 1 : 0); |
2171 |
|
return $retVal; |
2172 |
|
} |
2173 |
|
|
2174 |
=head3 EstimateRowSize |
=head3 EstimateRowSize |
2175 |
|
|
2176 |
C<< my $rowSize = $erdb->EstimateRowSize($relName); >> |
C<< my $rowSize = $erdb->EstimateRowSize($relName); >> |
3300 |
|
|
3301 |
=head3 SortNeeded |
=head3 SortNeeded |
3302 |
|
|
3303 |
C<< my $flag = $erdb->SortNeeded($relationName); >> |
C<< my $parms = $erdb->SortNeeded($relationName); >> |
3304 |
|
|
3305 |
|
Return the pipe command for the sort that should be applied to the specified |
3306 |
|
relation when creating the load file. |
3307 |
|
|
3308 |
|
For example, if the load file should be sorted ascending by the first |
3309 |
|
field, this method would return |
3310 |
|
|
3311 |
|
sort -k1 -t"\t" |
3312 |
|
|
3313 |
Return TRUE if the specified relation should be sorted during loading to remove duplicate keys, |
If the first field is numeric, the method would return |
3314 |
else FALSE. |
|
3315 |
|
sort -k1n -t"\t" |
3316 |
|
|
3317 |
|
Unfortunately, due to a bug in the C<sort> command, we cannot eliminate duplicate |
3318 |
|
keys using a sort. |
3319 |
|
|
3320 |
=over 4 |
=over 4 |
3321 |
|
|
3323 |
|
|
3324 |
Name of the relation to be examined. |
Name of the relation to be examined. |
3325 |
|
|
3326 |
=item RETURN |
=item |
3327 |
|
|
3328 |
Returns TRUE if the relation needs a sort, else FALSE. |
Returns the sort command to use for sorting the relation, suitable for piping. |
3329 |
|
|
3330 |
=back |
=back |
3331 |
|
|
3334 |
sub SortNeeded { |
sub SortNeeded { |
3335 |
# Get the parameters. |
# Get the parameters. |
3336 |
my ($self, $relationName) = @_; |
my ($self, $relationName) = @_; |
3337 |
# Declare the return variable. |
# Declare a descriptor to hold the names of the key fields. |
3338 |
my $retVal = 0; |
my @keyNames = (); |
3339 |
# Find out if the relation is a primary entity relation. |
# Get the relation structure. |
3340 |
|
my $relationData = $self->_FindRelation($relationName); |
3341 |
|
# Find out if the relation is a primary entity relation, |
3342 |
|
# a relationship relation, or a secondary entity relation. |
3343 |
my $entityTable = $self->{_metaData}->{Entities}; |
my $entityTable = $self->{_metaData}->{Entities}; |
3344 |
|
my $relationshipTable = $self->{_metaData}->{Relationships}; |
3345 |
if (exists $entityTable->{$relationName}) { |
if (exists $entityTable->{$relationName}) { |
3346 |
my $keyType = $entityTable->{$relationName}->{keyType}; |
# Here we have a primary entity relation. |
3347 |
Trace("Relation $relationName found in entity table with key type $keyType.") if T(3); |
push @keyNames, "id"; |
3348 |
# If the key is not a hash string, we must do the sort. |
} elsif (exists $relationshipTable->{$relationName}) { |
3349 |
if ($keyType ne 'hash-string') { |
# Here we have a relationship. We sort using the FROM index. |
3350 |
$retVal = 1; |
my $relationshipData = $relationshipTable->{$relationName}; |
3351 |
|
my $index = $relationData->{Indexes}->{"idx${relationName}From"}; |
3352 |
|
push @keyNames, @{$index->{IndexFields}}; |
3353 |
|
} else { |
3354 |
|
# Here we have a secondary entity relation, so we have a sort on the ID field. |
3355 |
|
push @keyNames, "id"; |
3356 |
|
} |
3357 |
|
# Now we parse the key names into sort parameters. First, we prime the return |
3358 |
|
# string. |
3359 |
|
my $retVal = "sort -t\"\t\" "; |
3360 |
|
# Get the relation's field list. |
3361 |
|
my @fields = @{$relationData->{Fields}}; |
3362 |
|
# Loop through the keys. |
3363 |
|
for my $keyData (@keyNames) { |
3364 |
|
# Get the key and the ordering. |
3365 |
|
my ($keyName, $ordering); |
3366 |
|
if ($keyData =~ /^([^ ]+) DESC/) { |
3367 |
|
($keyName, $ordering) = ($1, "descending"); |
3368 |
|
} else { |
3369 |
|
($keyName, $ordering) = ($keyData, "ascending"); |
3370 |
|
} |
3371 |
|
# Find the key's position and type. |
3372 |
|
my $fieldSpec; |
3373 |
|
for (my $i = 0; $i <= $#fields && ! $fieldSpec; $i++) { |
3374 |
|
my $thisField = $fields[$i]; |
3375 |
|
if ($thisField->{name} eq $keyName) { |
3376 |
|
# Get the sort modifier for this field type. The modifier |
3377 |
|
# decides whether we're using a character, numeric, or |
3378 |
|
# floating-point sort. |
3379 |
|
my $modifier = $TypeTable{$thisField->{type}}->{sort}; |
3380 |
|
# If the index is descending for this field, denote we want |
3381 |
|
# to reverse the sort order on this field. |
3382 |
|
if ($ordering eq 'descending') { |
3383 |
|
$modifier .= "r"; |
3384 |
|
} |
3385 |
|
# Store the position and modifier into the field spec, which |
3386 |
|
# will stop the inner loop. Note that the field number is |
3387 |
|
# 1-based in the sort command, so we have to increment the |
3388 |
|
# index. |
3389 |
|
$fieldSpec = ($i + 1) . $modifier; |
3390 |
|
} |
3391 |
} |
} |
3392 |
|
# Add this field to the sort command. |
3393 |
|
$retVal .= " -k$fieldSpec"; |
3394 |
} |
} |
3395 |
# Return the result. |
# Return the result. |
3396 |
return $retVal; |
return $retVal; |